From: Joshua Blanch Date: Fri, 27 Mar 2026 18:43:22 +0000 (+0000) Subject: mgr/cephadm: Fixes infinite loop when OSD zap fails during removal X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=8a32b79c0d5059873dbf5d35930ff6bc4cb66ff2;p=ceph.git mgr/cephadm: Fixes infinite loop when OSD zap fails during removal This kept the OSD in the queue and caused the entire removal sequence to replay indefinitely on each serve iteration. Fixes: https://tracker.ceph.com/issues/75744 Signed-off-by: Joshua Blanch --- diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index ace484cd60ef..803ca57fd7c6 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -937,10 +937,12 @@ class OSDRemovalQueue(object): logger.info(f"Successfully purged {osd} on {osd.hostname}") if osd.zap: - # throws an exception if the zap fails - logger.info(f"Zapping devices for {osd} on {osd.hostname}") - osd.do_zap() - logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") + try: + logger.info(f"Zapping devices for {osd} on {osd.hostname}") + osd.do_zap() + logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") + except Exception: + logger.exception(f"Failed to zap devices for {osd} on {osd.hostname}") self.mgr.cache.invalidate_host_devices(osd.hostname) logger.debug(f"Removing {osd} from the queue.")