From: Kushal Deb Date: Mon, 20 Apr 2026 09:05:14 +0000 (+0530) Subject: mgr/cephadm: preserve in-flight D3N allocations during fresh deploy X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=c422083ab3f4aab87642e53caf970018a2c339e8;p=ceph.git mgr/cephadm: preserve in-flight D3N allocations during fresh deploy Avoid pruning per-(service, host) D3N device assignments when daemon details are not yet visible in the mgr cache. This keeps initial daemon-to-device mappings stable on fresh deployment so 1:1 allocation is preserved when free devices exist. Signed-off-by: Kushal Deb --- diff --git a/src/pybind/mgr/cephadm/services/cephadmservice.py b/src/pybind/mgr/cephadm/services/cephadmservice.py index c6705ca03790..a159b128df7d 100644 --- a/src/pybind/mgr/cephadm/services/cephadmservice.py +++ b/src/pybind/mgr/cephadm/services/cephadmservice.py @@ -1420,7 +1420,10 @@ class RgwService(CephService): raise OrchestratorError("missing host in daemon_spec") service_name = daemon_spec.service_name - daemon_details = self.mgr.cache.get_daemons_by_service(service_name) + daemon_details = [ + dd for dd in self.mgr.cache.get_daemons_by_service(service_name) + if dd.hostname == host + ] fs_type, size_bytes, devs = d3n_get_host_devs(d3n, host) device = alloc.plan_device_for_daemon(service_name, host, devs, daemon_spec.daemon_id, daemon_details) diff --git a/src/pybind/mgr/cephadm/services/rgw_d3n.py b/src/pybind/mgr/cephadm/services/rgw_d3n.py index 34bbbed01277..82ee7a3597df 100644 --- a/src/pybind/mgr/cephadm/services/rgw_d3n.py +++ b/src/pybind/mgr/cephadm/services/rgw_d3n.py @@ -21,7 +21,6 @@ class D3NDevicePlanner: mgr: "CephadmOrchestrator" def _d3n_fail_if_devs_used_by_other_rgw_service(self, host: str, devs: List[str], service_name: str) -> None: - logger.info("1361") wanted = set(devs) # check rgw daemons on this host @@ -63,15 +62,26 @@ class D3NDevicePlanner: current_daemon_id: str, key: tuple[str, str], ) -> None: + """ + Prune invalid and stale entries from the per-(service, host) D3N allocation map. + Keep in-flight allocations during fresh deployment to avoid losing initial + daemon-to-device assignments before daemons appear in the mgr cache. + """ invalid = [did for did, dev in alloc.items() if dev not in devs] for did in invalid: del alloc[did] - logger.debug(f"[D3N][alloc] prune-invalid: removed={invalid} devs={devs} alloc_now={alloc}") + + logger.debug( + f"[D3N][alloc] prune-invalid: key={key} removed={invalid} " + f"devs={devs} alloc_now={alloc}" + ) + if not daemon_details: - if alloc: - logger.info(f"[D3N][alloc] clear-stale: key={key} alloc_was={alloc}") - alloc.clear() + logger.debug( + f"[D3N][alloc] gc: key={key} no daemon_details yet; " + f"preserving in-flight alloc={alloc}" + ) return live_daemon_ids: set[str] = set() @@ -85,8 +95,9 @@ class D3NDevicePlanner: stale = [did for did in list(alloc.keys()) if did not in live_daemon_ids] for did in stale: del alloc[did] + logger.debug( - f"gc: key={key} live={sorted(live_daemon_ids)} " + f"[D3N][alloc] gc: key={key} live={sorted(live_daemon_ids)} " f"removed={stale} alloc_now={alloc}" )