From: Guillaume Abrioux Date: Fri, 31 Mar 2023 09:27:13 +0000 (+0200) Subject: cephadm: osd replacement improvement X-Git-Tag: v17.2.7~380^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e36aada6ace17d26db9f7b6a60edf9af93d1cc66;p=ceph.git cephadm: osd replacement improvement This adds a new parameter `--no-destroy` to the command `ceph orch osd rm`. By default, it removes any VGs/LVs related to the osd being removed. For specific workflows, this can be useful to preserve them. Fixes: https://tracker.ceph.com/issues/59289 Signed-off-by: Guillaume Abrioux (cherry picked from commit ef810a4ebf91b538f778da4ca3ea0fdc2968e2fe) --- diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index 9b708d52bc9..4e6dba9bc4f 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -3009,7 +3009,8 @@ Then run the following: def remove_osds(self, osd_ids: List[str], replace: bool = False, force: bool = False, - zap: bool = False) -> str: + zap: bool = False, + no_destroy: bool = False) -> str: """ Takes a list of OSDs and schedules them for removal. The function that takes care of the actual removal is @@ -3031,6 +3032,7 @@ Then run the following: replace=replace, force=force, zap=zap, + no_destroy=no_destroy, hostname=daemon.hostname, process_started_at=datetime_now(), remove_util=self.to_remove_osds.rm_util)) diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index 31771fb5fce..9f404646f38 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -542,9 +542,12 @@ class RemoveUtil(object): def zap_osd(self, osd: "OSD") -> str: "Zaps all devices that are associated with an OSD" if osd.hostname is not None: + cmd = ['--', 'lvm', 'zap', '--osd-id', str(osd.osd_id)] + if not osd.no_destroy: + cmd.append('--destroy') out, err, code = self.mgr.wait_async(CephadmServe(self.mgr)._run_cephadm( osd.hostname, 'osd', 'ceph-volume', - ['--', 'lvm', 'zap', '--destroy', '--osd-id', str(osd.osd_id)], + cmd, error_ok=True)) self.mgr.cache.invalidate_host_devices(osd.hostname) if code: @@ -608,7 +611,8 @@ class OSD: replace: bool = False, force: bool = False, hostname: Optional[str] = None, - zap: bool = False): + zap: bool = False, + no_destroy: bool = False): # the ID of the OSD self.osd_id = osd_id @@ -647,6 +651,8 @@ class OSD: # Whether devices associated with the OSD should be zapped (DATA ERASED) self.zap = zap + # Whether all associated LV devices should be destroyed. + self.no_destroy = no_destroy def start(self) -> None: if self.started: diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 58449f7dd07..a39a1ce1f3f 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -581,12 +581,14 @@ class Orchestrator(object): def remove_osds(self, osd_ids: List[str], replace: bool = False, force: bool = False, - zap: bool = False) -> OrchResult[str]: + zap: bool = False, + no_destroy: bool = False) -> OrchResult[str]: """ :param osd_ids: list of OSD IDs :param replace: marks the OSD as being destroyed. See :ref:`orchestrator-osd-replace` :param force: Forces the OSD removal process without waiting for the data to be drained first. :param zap: Zap/Erase all devices associated with the OSDs (DESTROYS DATA) + :param no_destroy: Do not destroy associated VGs/LVs with the OSD. .. note:: this can only remove OSDs that were successfully diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 3c4274f43e2..f638b9e49de 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -966,9 +966,11 @@ Usage: osd_id: List[str], replace: bool = False, force: bool = False, - zap: bool = False) -> HandleCommandResult: + zap: bool = False, + no_destroy: bool = False) -> HandleCommandResult: """Remove OSD daemons""" - completion = self.remove_osds(osd_id, replace=replace, force=force, zap=zap) + completion = self.remove_osds(osd_id, replace=replace, force=force, + zap=zap, no_destroy=no_destroy) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str()) diff --git a/src/pybind/mgr/rook/module.py b/src/pybind/mgr/rook/module.py index 370cb658219..b67349d1bff 100644 --- a/src/pybind/mgr/rook/module.py +++ b/src/pybind/mgr/rook/module.py @@ -625,7 +625,12 @@ class RookOrchestrator(MgrModule, orchestrator.Orchestrator): } self.set_store("drive_group_map", json.dumps(json_drive_group_map)) - def remove_osds(self, osd_ids: List[str], replace: bool = False, force: bool = False, zap: bool = False) -> OrchResult[str]: + def remove_osds(self, + osd_ids: List[str], + replace: bool = False, + force: bool = False, + zap: bool = False, + no_destroy: bool = False) -> OrchResult[str]: assert self._rook_cluster is not None if zap: raise RuntimeError("Rook does not support zapping devices during OSD removal.")