]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
src/ceph-volume: fast device unavailable as error 67745/head
authorTimothy Q Nguyen <timqn22@gmail.com>
Wed, 11 Mar 2026 18:45:38 +0000 (11:45 -0700)
committerTimothy Q Nguyen <timqn22@gmail.com>
Thu, 19 Mar 2026 16:13:13 +0000 (09:13 -0700)
Normally when fast devices are passed to batch command but
no fast allocations could be found the batch command will
do nothing and return an empty plan. This leads to issues
however because the return essentially makes this issue silent
which makes it hard to debug in certain scenarios. I propose
to change this to raise error, and have made changes in osd.py
to better log the errors and process the exceptions. This
shouldn't affect processes that much and the change in
osd.py ensures the raised errors will not interrupt the return
output. I've also changed the unit tests to account for
change.

Signed-off-by: Timothy Q Nguyen <timqn22@gmail.com>
src/ceph-volume/ceph_volume/devices/lvm/batch.py
src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py
src/pybind/mgr/cephadm/services/osd.py

index 7bf774dbbad5c059533fcdd4299a7381d3a36b24..af29801f46ed00a66a2b839d7cdb7c319e929728 100644 (file)
@@ -447,8 +447,8 @@ class Batch(object):
                                                  num_osds,
                                                  fast_type)
         if fast_devices and not fast_allocations:
-            mlogger.info('{} fast devices were passed, but none are available'.format(len(fast_devices)))
-            return []
+            mlogger.error('{} fast devices were passed, but none are available'.format(len(fast_devices)))
+            exit(1)
         if fast_devices and not len(fast_allocations) == num_osds:
             mlogger.error('{} fast allocations != {} num_osds'.format(
                 len(fast_allocations), num_osds))
@@ -459,8 +459,8 @@ class Batch(object):
                                                       num_osds,
                                                       'block_wal')
         if very_fast_devices and not very_fast_allocations:
-            mlogger.info('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
-            return []
+            mlogger.error('{} very fast devices were passed, but none are available'.format(len(very_fast_devices)))
+            exit(1)
         if very_fast_devices and not len(very_fast_allocations) == num_osds:
             mlogger.error('{} very fast allocations != {} num_osds'.format(
                 len(very_fast_allocations), num_osds))
index 79c9f7122743c1d8a2d8e20637ca992faee7de1e..0300cb772d4c20f890e367e7d427aa6d4bfcf174 100644 (file)
@@ -43,73 +43,73 @@ class TestBatch(object):
         with pytest.raises(ArgumentError):
             arg_validators.ValidBatchDevice()('foo')
 
-    @pytest.mark.parametrize('format_', ['pretty', 'json', 'json-pretty'])
-    def test_report(self, format_, factory, conf_ceph_stub, mock_device_generator):
-        # just ensure reporting works
+    def test_exit_on_unavailable_fast_allocation(self, factory, conf_ceph_stub, mock_device_generator):
         conf_ceph_stub('[global]\nfsid=asdf-lkjh')
         devs = [mock_device_generator() for _ in range(5)]
+        fast_devs = [mock_device_generator()]
+        fast_devs[0].available_lvm = False
         args = factory(data_slots=1,
                        osds_per_device=1,
                        osd_ids=[],
-                       report=True,
-                       format=format_,
                        devices=devs,
-                       db_devices=[],
+                       db_devices=fast_devs,
                        wal_devices=[],
                        objectstore='bluestore',
-                       block_db_size=disk.Size(gb=1),
-                       block_db_slots=1,
+                       block_db_size="1G",
+                       block_db_slots=1.0,
                        dmcrypt=True,
                        data_allocate_fraction=1.0,
                        has_block_db_size_without_db_devices=None
                       )
         b = batch.Batch([])
         b.args = args
-        plan = b.get_deployment_layout()
-        b.report(plan)
+        with pytest.raises(SystemExit) as err:
+            b.get_deployment_layout()
+        assert err.value.code == 1
 
-    @pytest.mark.parametrize('format_', ['json', 'json-pretty'])
-    def test_json_report_valid_empty(self, format_, factory, conf_ceph_stub, mock_device_generator):
+    def test_exit_on_unavailable_very_fast_allocation(self, factory, conf_ceph_stub, mock_device_generator):
         # ensure json reports are valid when empty
         conf_ceph_stub('[global]\nfsid=asdf-lkjh')
-        devs = []
+        devs = [mock_device_generator() for _ in range(5)]
+        fast_devs = [mock_device_generator()]
+        fast_devs[0].available_lvm = False
+        very_fast_devs = [mock_device_generator()]
+        very_fast_devs[0].available_lvm = False
         args = factory(data_slots=1,
                        osds_per_device=1,
                        osd_ids=[],
-                       report=True,
-                       format=format_,
                        devices=devs,
-                       db_devices=[],
-                       wal_devices=[],
+                       db_devices=fast_devs,
+                       wal_devices=very_fast_devs,
                        objectstore='bluestore',
                        block_db_size="1G",
+                       block_db_slots=5,
                        dmcrypt=True,
                        data_allocate_fraction=1.0,
+                       has_block_db_size_without_db_devices=None
                       )
         b = batch.Batch([])
         b.args = args
-        plan = b.get_deployment_layout()
-        report = b._create_report(plan)
-        json.loads(report)
+        with pytest.raises(SystemExit) as err:
+            b.get_deployment_layout()
+        assert err.value.code == 1
 
-    @pytest.mark.parametrize('format_', ['json', 'json-pretty'])
-    def test_json_report_valid_empty_unavailable_fast(self, format_, factory, conf_ceph_stub, mock_device_generator):
-        # ensure json reports are valid when empty
+    @pytest.mark.parametrize('format_', ['pretty', 'json', 'json-pretty'])
+    def test_report(self, format_, factory, conf_ceph_stub, mock_device_generator):
+        # just ensure reporting works
         conf_ceph_stub('[global]\nfsid=asdf-lkjh')
         devs = [mock_device_generator() for _ in range(5)]
-        fast_devs = [mock_device_generator()]
-        fast_devs[0].available_lvm = False
         args = factory(data_slots=1,
                        osds_per_device=1,
                        osd_ids=[],
                        report=True,
                        format=format_,
                        devices=devs,
-                       db_devices=fast_devs,
+                       db_devices=[],
                        wal_devices=[],
                        objectstore='bluestore',
-                       block_db_size="1G",
-                       block_db_slots=1.0,
+                       block_db_size=disk.Size(gb=1),
+                       block_db_slots=1,
                        dmcrypt=True,
                        data_allocate_fraction=1.0,
                        has_block_db_size_without_db_devices=None
@@ -117,33 +117,25 @@ class TestBatch(object):
         b = batch.Batch([])
         b.args = args
         plan = b.get_deployment_layout()
-        report = b._create_report(plan)
-        json.loads(report)
-
+        b.report(plan)
 
     @pytest.mark.parametrize('format_', ['json', 'json-pretty'])
-    def test_json_report_valid_empty_unavailable_very_fast(self, format_, factory, conf_ceph_stub, mock_device_generator):
+    def test_json_report_valid_empty(self, format_, factory, conf_ceph_stub, mock_device_generator):
         # ensure json reports are valid when empty
         conf_ceph_stub('[global]\nfsid=asdf-lkjh')
-        devs = [mock_device_generator() for _ in range(5)]
-        fast_devs = [mock_device_generator()]
-        fast_devs[0].available_lvm = False
-        very_fast_devs = [mock_device_generator()]
-        very_fast_devs[0].available_lvm = False
+        devs = []
         args = factory(data_slots=1,
                        osds_per_device=1,
                        osd_ids=[],
                        report=True,
                        format=format_,
                        devices=devs,
-                       db_devices=fast_devs,
-                       wal_devices=very_fast_devs,
+                       db_devices=[],
+                       wal_devices=[],
                        objectstore='bluestore',
                        block_db_size="1G",
-                       block_db_slots=5,
                        dmcrypt=True,
                        data_allocate_fraction=1.0,
-                       has_block_db_size_without_db_devices=None
                       )
         b = batch.Batch([])
         b.args = args
index 855dc6286ef133edb37db491fd144cea1c77fb0b..f0fd3c15ef0a460deaa5b59b61983dc2490e1388 100644 (file)
@@ -72,14 +72,18 @@ class OSDService(CephService):
             self.mgr.cache.save_host(host)
             return ret_msg
 
-        async def all_hosts() -> List[Optional[str]]:
+        async def all_hosts() -> List[str]:
             futures = [create_from_spec_one(h, ds)
                        for h, ds in self.prepare_drivegroup(drive_group)]
-            return await gather(*futures)
+            results = await gather(*futures, return_exceptions=True)
+            for result in results:
+                if isinstance(result, Exception):
+                    self.mgr.log.error(f'Failed to create OSD: {result}')
+            return [result for result in results if isinstance(result, str)]
 
         with self.mgr.async_timeout_handler('cephadm deploy (osd daemon)'):
             ret = self.mgr.wait_async(all_hosts())
-        return ", ".join(filter(None, ret))
+        return ", ".join(ret)
 
     async def create_single_host(self,
                                  drive_group: DriveGroupSpec,