]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: Fixes infinite loop when OSD zap fails during removal 68061/head
authorJoshua Blanch <joshua.blanch@clyso.com>
Fri, 27 Mar 2026 18:43:22 +0000 (18:43 +0000)
committerJoshua Blanch <joshua.blanch@clyso.com>
Fri, 27 Mar 2026 18:43:22 +0000 (18:43 +0000)
This kept the OSD in the queue and caused the entire removal
sequence to replay indefinitely on each serve iteration.

Fixes: https://tracker.ceph.com/issues/75744
Signed-off-by: Joshua Blanch <joshua.blanch@clyso.com>
src/pybind/mgr/cephadm/services/osd.py

index ace484cd60efb21412951120c4d7df6cefe46896..803ca57fd7c6310781832019e193658028c9c491 100644 (file)
@@ -937,10 +937,12 @@ class OSDRemovalQueue(object):
                 logger.info(f"Successfully purged {osd} on {osd.hostname}")
 
             if osd.zap:
-                # throws an exception if the zap fails
-                logger.info(f"Zapping devices for {osd} on {osd.hostname}")
-                osd.do_zap()
-                logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}")
+                try:
+                    logger.info(f"Zapping devices for {osd} on {osd.hostname}")
+                    osd.do_zap()
+                    logger.info(f"Successfully zapped devices for {osd} on {osd.hostname}")
+                except Exception:
+                    logger.exception(f"Failed to zap devices for {osd} on {osd.hostname}")
             self.mgr.cache.invalidate_host_devices(osd.hostname)
             logger.debug(f"Removing {osd} from the queue.")