]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: preserve in-flight D3N allocations during fresh deploy 66778/head
authorKushal Deb <Kushal.Deb@ibm.com>
Mon, 20 Apr 2026 09:05:14 +0000 (14:35 +0530)
committerKushal Deb <Kushal.Deb@ibm.com>
Mon, 20 Apr 2026 09:08:50 +0000 (14:38 +0530)
Avoid pruning per-(service, host) D3N device assignments when
daemon details are not yet visible in the mgr cache.

This keeps initial daemon-to-device mappings stable on fresh
deployment so 1:1 allocation is preserved when free devices exist.

Signed-off-by: Kushal Deb <Kushal.Deb@ibm.com>
src/pybind/mgr/cephadm/services/cephadmservice.py
src/pybind/mgr/cephadm/services/rgw_d3n.py

index c6705ca03790d4c0252bc7350ab39978b3f5eb9c..a159b128df7d1c91b15bcd3cf4079cad8480ebc4 100644 (file)
@@ -1420,7 +1420,10 @@ class RgwService(CephService):
             raise OrchestratorError("missing host in daemon_spec")
 
         service_name = daemon_spec.service_name
-        daemon_details = self.mgr.cache.get_daemons_by_service(service_name)
+        daemon_details = [
+            dd for dd in self.mgr.cache.get_daemons_by_service(service_name)
+            if dd.hostname == host
+        ]
 
         fs_type, size_bytes, devs = d3n_get_host_devs(d3n, host)
         device = alloc.plan_device_for_daemon(service_name, host, devs, daemon_spec.daemon_id, daemon_details)
index 34bbbed0127741c712f69569a3595b7b1c55fa52..82ee7a3597df37c9a8815ee143929fc34016fb3c 100644 (file)
@@ -21,7 +21,6 @@ class D3NDevicePlanner:
     mgr: "CephadmOrchestrator"
 
     def _d3n_fail_if_devs_used_by_other_rgw_service(self, host: str, devs: List[str], service_name: str) -> None:
-        logger.info("1361")
         wanted = set(devs)
 
         # check rgw daemons on this host
@@ -63,15 +62,26 @@ class D3NDevicePlanner:
             current_daemon_id: str,
             key: tuple[str, str],
     ) -> None:
+        """
+        Prune invalid and stale entries from the per-(service, host) D3N allocation map.
 
+        Keep in-flight allocations during fresh deployment to avoid losing initial
+        daemon-to-device assignments before daemons appear in the mgr cache.
+        """
         invalid = [did for did, dev in alloc.items() if dev not in devs]
         for did in invalid:
             del alloc[did]
-        logger.debug(f"[D3N][alloc] prune-invalid: removed={invalid} devs={devs} alloc_now={alloc}")
+
+        logger.debug(
+            f"[D3N][alloc] prune-invalid: key={key} removed={invalid} "
+            f"devs={devs} alloc_now={alloc}"
+        )
+
         if not daemon_details:
-            if alloc:
-                logger.info(f"[D3N][alloc] clear-stale: key={key} alloc_was={alloc}")
-                alloc.clear()
+            logger.debug(
+                f"[D3N][alloc] gc: key={key} no daemon_details yet; "
+                f"preserving in-flight alloc={alloc}"
+            )
             return
 
         live_daemon_ids: set[str] = set()
@@ -85,8 +95,9 @@ class D3NDevicePlanner:
         stale = [did for did in list(alloc.keys()) if did not in live_daemon_ids]
         for did in stale:
             del alloc[did]
+
         logger.debug(
-            f"gc: key={key} live={sorted(live_daemon_ids)} "
+            f"[D3N][alloc] gc: key={key} live={sorted(live_daemon_ids)} "
             f"removed={stale} alloc_now={alloc}"
         )