From 8a32b79c0d5059873dbf5d35930ff6bc4cb66ff2 Mon Sep 17 00:00:00 2001 From: Joshua Blanch Date: Fri, 27 Mar 2026 18:43:22 +0000 Subject: [PATCH] mgr/cephadm: Fixes infinite loop when OSD zap fails during removal This kept the OSD in the queue and caused the entire removal sequence to replay indefinitely on each serve iteration. Fixes: https://tracker.ceph.com/issues/75744 Signed-off-by: Joshua Blanch --- src/pybind/mgr/cephadm/services/osd.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py index ace484cd60ef..803ca57fd7c6 100644 --- a/src/pybind/mgr/cephadm/services/osd.py +++ b/src/pybind/mgr/cephadm/services/osd.py @@ -937,10 +937,12 @@ class OSDRemovalQueue(object): logger.info(f"Successfully purged {osd} on {osd.hostname}") if osd.zap: - # throws an exception if the zap fails - logger.info(f"Zapping devices for {osd} on {osd.hostname}") - osd.do_zap() - logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") + try: + logger.info(f"Zapping devices for {osd} on {osd.hostname}") + osd.do_zap() + logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}") + except Exception: + logger.exception(f"Failed to zap devices for {osd} on {osd.hostname}") self.mgr.cache.invalidate_host_devices(osd.hostname) logger.debug(f"Removing {osd} from the queue.") -- 2.47.3