From: bachmanity1 Date: Tue, 10 Mar 2026 14:13:03 +0000 (+0900) Subject: mgr/cephadm: fix KeyError when host is removed during serve loop X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f7cf8ce1e6576188841fda5bb9bf30624b092f21;p=ceph.git mgr/cephadm: fix KeyError when host is removed during serve loop Signed-off-by: bachmanity1 --- diff --git a/src/pybind/mgr/cephadm/inventory.py b/src/pybind/mgr/cephadm/inventory.py index aed6ba03efad..3fda8c0e5907 100644 --- a/src/pybind/mgr/cephadm/inventory.py +++ b/src/pybind/mgr/cephadm/inventory.py @@ -1331,7 +1331,7 @@ class HostCache(): if host in self.mgr.offline_hosts: dd.status = orchestrator.DaemonDescriptionStatus.error dd.status_desc = 'host is offline' - elif self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance": + elif self.mgr.inventory._inventory.get(host, {}).get("status", "").lower() == "maintenance": # We do not refresh daemons on hosts in maintenance mode, so stored daemon statuses # could be wrong. We must assume maintenance is working and daemons are stopped dd.status = orchestrator.DaemonDescriptionStatus.stopped diff --git a/src/pybind/mgr/cephadm/serve.py b/src/pybind/mgr/cephadm/serve.py index 3f1c63bcda11..22083ffbbacd 100644 --- a/src/pybind/mgr/cephadm/serve.py +++ b/src/pybind/mgr/cephadm/serve.py @@ -252,8 +252,11 @@ class CephadmServe: @forall_hosts def refresh(host: str) -> None: - # skip hosts that are in maintenance - they could be powered off - if self.mgr.inventory._inventory[host].get("status", "").lower() == "maintenance": + # skip hosts that were removed or are in maintenance - they could be powered off + host_info = self.mgr.inventory._inventory.get(host) + if host_info is None: + return + if host_info.get("status", "").lower() == "maintenance": return if self.mgr.use_agent: @@ -862,8 +865,13 @@ class CephadmServe: try: all_slots, slots_to_add, daemons_to_remove = ha.place() - daemons_to_remove = [d for d in daemons_to_remove if (d.hostname and self.mgr.inventory._inventory[d.hostname].get( - 'status', '').lower() not in ['maintenance', 'offline'] and d.hostname not in self.mgr.offline_hosts)] + daemons_to_remove = [ + d for d in daemons_to_remove if ( + d.hostname + and d.hostname in self.mgr.inventory._inventory + and self.mgr.inventory._inventory.get(d.hostname, {}).get( + 'status', '').lower() not in ['maintenance', 'offline'] + and d.hostname not in self.mgr.offline_hosts)] self.log.debug('Add %s, remove %s' % (slots_to_add, daemons_to_remove)) except OrchestratorError as e: msg = f'Failed to apply {spec.service_name()} spec {spec}: {str(e)}'