cephadm: wait for latest osd map after ceph-volume before OSD deploy

author Guillaume Abrioux <gabrioux@ibm.com>

Fri, 10 Apr 2026 14:37:58 +0000 (16:37 +0200)

committer Guillaume Abrioux <gabrioux@ibm.com>

Tue, 14 Apr 2026 07:53:04 +0000 (09:53 +0200)
author Guillaume Abrioux <gabrioux@ibm.com>
Fri, 10 Apr 2026 14:37:58 +0000 (16:37 +0200)
committer Guillaume Abrioux <gabrioux@ibm.com>
Tue, 14 Apr 2026 07:53:04 +0000 (09:53 +0200)
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index 347305c649954ccdf4ac903df1b6f46a9a6104f0..d5d7817b3579f3da5fd5499ad283debe493af280 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -2988,13 +2988,16 @@ Then run the following:
          osd_map = self.get('osd_map')
          r = {}
          for o in osd_map['osds']:
-            # only include OSDs that have ever started in this map.  this way
-            # an interrupted osd create can be repeated and succeed the second
-            # time around.
+            # when only_up, only include OSDs that are currently up, so we do not
+            # treat a "just created" (still down) osd as "already present" for
+            # deploy_osd_daemons_for_existing_osds().
              osd_id = o.get('osd')
              if osd_id is None:
                  raise OrchestratorError("Could not retrieve osd_id from osd_map")
-            if not only_up:
+            if only_up:
+                if o.get('up') == 1:
+                    r[str(osd_id)] = o.get('uuid', '')
+            else:
                  r[str(osd_id)] = o.get('uuid', '')
          return r
  
diff --git a/src/pybind/mgr/cephadm/services/osd.py b/src/pybind/mgr/cephadm/services/osd.py

index ea78b5c38147bafc940cb581ffb14e576b277452..1c8bc5c6818cab8e6850cafa50a796414cc7ed15 100644 (file)
--- a/src/pybind/mgr/cephadm/services/osd.py
+++ b/src/pybind/mgr/cephadm/services/osd.py
@@ -1,6 +1,6 @@
  import json
  import logging
-from asyncio import gather
+from asyncio import gather, to_thread
  from threading import Lock
  from typing import List, Dict, Any, Set, Tuple, cast, Optional, TYPE_CHECKING
  
@@ -116,6 +116,17 @@ class OSDService(CephService):
              replace_osd_ids = OsdIdClaims(self.mgr).filtered_by_host(host)
              assert replace_osd_ids is not None
  
+        # ceph-volume registers new OSDs with the monitor before returning.
+        # the mgr's view of the osd map can briefly lag, so get_osd_uuid_map()
+        # would miss the new id and we would skip deploying the cephadm
+        # daemon (misleading "Created no osd(s)" while the osd exists but is still down).
+        # wait_for_latest_osdmap() is synchronous:
+        # We need to run it in a thread pool so we do not block the cephadm asyncio event loop.
+        ret = await to_thread(self.mgr.rados.wait_for_latest_osdmap)
+        if ret < 0:
+            raise OrchestratorError(
+                'wait_for_latest_osdmap failed with %d' % ret)
+
          # check result: lvm
          osds_elems: dict = await CephadmServe(self.mgr)._run_cephadm_json(
              host, 'osd', 'ceph-volume',
diff --git a/src/pybind/mgr/cephadm/tests/fixtures.py b/src/pybind/mgr/cephadm/tests/fixtures.py

index dda0c6720ac6cbda35b9dad29fc04d78d1a60f9e..92fa87ac82906d90c507e1f12e8a74d00342a850 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/fixtures.py
+++ b/src/pybind/mgr/cephadm/tests/fixtures.py
@@ -133,6 +133,11 @@ def with_cephadm_module(module_options=None, store=None):
          m.event_loop = MockEventLoopThread()
          m.tkey = NamedTemporaryFile(prefix='test-cephadm-identity-')
  
+        # librados wait_for_latest_osdmap() returns int (0 = success).
+        # Tests use a MagicMock for rados unless configured.
+        # OSD deploy compares ret < 0.
+        m.rados.wait_for_latest_osdmap.return_value = 0
+
          yield m
  
  
diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py

index 34b3a4e6083678d8bb78f3293ed977540b5349f0..264919cd0f95909c2ad220d6c6eb5257b354912a 100644 (file)
--- a/src/pybind/mgr/cephadm/tests/test_cephadm.py
+++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py
@@ -87,12 +87,15 @@ def with_daemon(cephadm_module: CephadmOrchestrator, spec: ServiceSpec, host: st
  
  @contextmanager
  def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str, osd_id: int, ceph_volume_lvm_list=None):
+    # OSD is in the cluster map but still down (no cephadm daemon yet). If up==1,
+    # get_osd_uuid_map(only_up=True) would list it in before_osd_uuid_map and
+    # deploy_osd_daemons_for_existing_osds would skip deploying the daemon.
      cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
          'osds': [
              {
                  'osd': 1,
                  'up_from': 0,
-                'up': True,
+                'up': 0,
                  'uuid': 'uuid'
              }
          ]
@@ -147,6 +150,39 @@ def with_osd_daemon(cephadm_module: CephadmOrchestrator, _run_cephadm, host: str
  
  class TestCephadm(object):
  
+    def test_get_osd_uuid_map_includes_down_osds_when_only_up_false(self, cephadm_module):
+        cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+            'osds': [
+                {'osd': 0, 'up': 1, 'uuid': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'},
+                {'osd': 1, 'up': 0, 'uuid': 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'},
+            ]
+        })
+        m = cephadm_module.get_osd_uuid_map(only_up=False)
+        assert m == {
+            '0': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa',
+            '1': 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb',
+        }
+
+    def test_get_osd_uuid_map_only_up_includes_only_up_osds(self, cephadm_module):
+        cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+            'osds': [
+                {'osd': 0, 'up': 1, 'uuid': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'},
+                {'osd': 1, 'up': 0, 'uuid': 'bbbbbbbb-bbbb-bbbb-bbbb-bbbbbbbbbbbb'},
+            ]
+        })
+        m = cephadm_module.get_osd_uuid_map(only_up=True)
+        assert m == {'0': 'aaaaaaaa-aaaa-aaaa-aaaa-aaaaaaaaaaaa'}
+
+    @mock.patch('cephadm.services.osd.to_thread', new_callable=mock.AsyncMock)
+    def test_deploy_osd_daemons_wait_for_latest_osdmap_failure(self, mock_to_thread, cephadm_module):
+        mock_to_thread.return_value = -5
+        with pytest.raises(OrchestratorError, match='wait_for_latest_osdmap failed'):
+            cephadm_module.wait_async(
+                cephadm_module.osd_service.deploy_osd_daemons_for_existing_osds(
+                    'test',
+                    DriveGroupSpec(service_type='osd', service_id=''),
+                    replace_osd_ids=[]))
+
      def test_get_unique_name(self, cephadm_module):
          # type: (CephadmOrchestrator) -> None
          existing = [
@@ -1576,6 +1612,13 @@ class TestCephadm(object):
                          dd2.status = DaemonDescriptionStatus.error
                          cephadm_module.cache.update_host_daemons(dd1.hostname, {dd1.name(): dd1})
                          cephadm_module.cache.update_host_daemons(dd2.hostname, {dd2.name(): dd2})
+                        # _check_for_moved_osds removes the stray duplicate only if
+                        # get_osd_by_id reports the osd as up
+                        cephadm_module.mock_store_set('_ceph_get', 'osd_map', {
+                            'osds': [
+                                {'osd': 1, 'up_from': 0, 'up': 1, 'uuid': 'uuid'}
+                            ]
+                        })
                          CephadmServe(cephadm_module)._check_for_moved_osds()
                          assert len(cephadm_module.cache.get_daemons()) == 1
author	Guillaume Abrioux <gabrioux@ibm.com>
	Fri, 10 Apr 2026 14:37:58 +0000 (16:37 +0200)
committer	Guillaume Abrioux <gabrioux@ibm.com>
	Tue, 14 Apr 2026 07:53:04 +0000 (09:53 +0200)
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/cephadm/services/osd.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/fixtures.py		patch \| blob \| history
src/pybind/mgr/cephadm/tests/test_cephadm.py		patch \| blob \| history