]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
ceph-volume: allocate db/wal slot on partial fast-device VG
authorRaimund Sacherer <rsachere@redhat.com>
Tue, 12 May 2026 09:05:26 +0000 (11:05 +0200)
committerRaimund Sacherer <rsachere@redhat.com>
Tue, 12 May 2026 14:35:08 +0000 (16:35 +0200)
On single-OSD redeploy where the fast device VG already has
DB LVs for sibling OSDs, get_physical_fast_allocs() returned an empty
list and ceph-volume fell back to a co-located OSD.

Two fixes in get_physical_fast_allocs():

- abs_size = dev_size / slots_for_vg can exceed vg_free when other
  slots are still in use, so the while-loop never enters. Fall back
  to abs_size = free_size / fast_slots_per_device.

- The loop counter was occupied_slots = len(dev.lvs), so on a partial
  VG the loop was aborted prematurely. Count only slots
  allocated in this call (new_slots) instead.

Initial issues where silent creation of OSD without DB, which
was fixed in commit 5c700ed7d64. After applying this fix we
did not get OSDs deployed at all.

Tested on RHCS 8 lab cluster (12 HDDs / 4 SSDs across 3 hosts,
db_slots: 6, encrypted)

Needs review: confirm new_slots match the original intent
of the per-batch cap when multiple OSDs are deployed in one call.

Fixes: https://tracker.ceph.com/issues/76522
Signed-off-by: Raimund Sacherer <rsachere@redhat.com>
src/ceph-volume/ceph_volume/devices/lvm/batch.py
src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py

index af29801f46ed00a66a2b839d7cdb7c319e929728..62083cba3154ebbe85128022692f7353de1921ff 100644 (file)
@@ -77,6 +77,14 @@ def get_physical_fast_allocs(devices: List[device.Device], type_: str, fast_slot
             # way
             abs_size = disk.Size(b=int(dev_size / slots_for_vg))
             free_size = dev.vg_free[0]
+            # When a fast device VG is partially used (e.g. one OSD's DB/WAL
+            # is still live while its partner is being replaced), slots_for_vg
+            # can undercount the true slot capacity, making abs_size larger
+            # than free_size so the while loop never fires.  Fall back to
+            # dividing the actual free space by the number of slots we still
+            # want to allocate.
+            if abs_size > free_size and fast_slots_per_device > 0:
+                abs_size = disk.Size(b=int(free_size / fast_slots_per_device))
             relative_size = int(abs_size) / dev_size
             if requested_size:
                 if requested_size <= abs_size:
@@ -90,9 +98,17 @@ def get_physical_fast_allocs(devices: List[device.Device], type_: str, fast_slot
                             abs_size,
                         ))
                     exit(1)
-            while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
+            # Track new allocations separately from pre-existing ones.
+            # fast_slots_per_device caps how many *new* slots this batch may
+            # add per device (distribution); requested_slots caps the *total*
+            # occupancy a device may carry (the spec's db_slots).
+            new_slots = 0
+            while (abs_size <= free_size
+                   and len(ret) < new_osds
+                   and new_slots < fast_slots_per_device
+                   and occupied_slots + new_slots < requested_slots):
                 free_size -= abs_size.b
-                occupied_slots += 1
+                new_slots += 1
                 ret.append((dev.path, relative_size, abs_size, requested_slots))
     return ret
 
index 0300cb772d4c20f890e367e7d427aa6d4bfcf174..adc7c45cd9b733ad77330f8473f0220af6ffbe0b 100644 (file)
@@ -314,6 +314,33 @@ class TestBatch(object):
         db_device = [mock_device_generator()]
         fast = b.fast_allocations(db_device, 1, 1, 'block_db')
         assert len(fast) == 1
+        # Layout: the allocation must reference the fast device, not the
+        # data device, with a non-trivial slot size.
+        assert fast[0][0] == db_device[0].path
+        assert int(fast[0][2]) > 0
+
+    def test_batch_fast_allocations_one_block_db_partial_vg(self,
+                                                            factory, conf_ceph_stub,
+                                                            mock_device_generator):
+        # Single-OSD redeploy at the Batch.fast_allocations() level (the
+        # one-call-up integration of get_physical_fast_allocs that exercises
+        # fast_slots_per_device recompute). When the fast device's VG already
+        # carries surviving DB LVs from sibling OSDs, fast_allocations must
+        # still produce one allocation on that fast device — not silently
+        # return [], which would let cephadm fall back to a co-located OSD.
+        #
+        # The spec sets db_slots: 6 (the per-device occupancy cap); only one
+        # OSD is being deployed in this batch, so fast_slots_per_device gets
+        # recomputed to 1, and the fast device already has 5 sibling LVs.
+        conf_ceph_stub('[global]\nfsid=asdf-lkjh')
+
+        b = batch.Batch([])
+        b.args.block_db_slots = 6
+        db_device = [mock_device_generator(number_lvs=5)]
+        fast = b.fast_allocations(db_device, 1, 1, 'block_db')
+        assert len(fast) == 1
+        assert fast[0][0] == db_device[0].path
+        assert int(fast[0][2]) > 0
 
     @pytest.mark.parametrize('occupied_prior', range(7))
     @pytest.mark.parametrize('slots,num_devs',
@@ -348,6 +375,26 @@ class TestBatch(object):
         assert len([f for f in fast if f[0] == '/dev/bar']) == expected_assignment_on_used_devices
         assert len([f for f in fast if f[0] != '/dev/bar']) == expected_num_osds - expected_assignment_on_used_devices
 
+    def test_get_physical_fast_allocs_redeploy_partial_vg(self, factory,
+                                                          conf_ceph_stub,
+                                                          mock_device_generator):
+        # Single-OSD redeploy where the fast-device VG already hosts
+        # surviving DB LVs for sibling OSDs must still produce one allocation.
+        # Reproducer: db_slots=6 in the spec, 5 LVs already on the fast
+        # device, one new OSD being deployed in this batch, so
+        # Batch.fast_allocations() recomputes fast_slots_per_device down to 1.
+        # With the original `occupied_slots < fast_slots_per_device` loop
+        # guard, occupied_slots==5 >= 1 short-circuited the loop and
+        # get_physical_fast_allocs() returned an empty list.
+        conf_ceph_stub('[global]\nfsid=asdf-lkjh')
+        fast_dev = mock_device_generator(number_lvs=5)
+        args = factory(block_db_slots=6, block_db_size=None,
+                       devices=['/dev/data'])
+        fast = batch.get_physical_fast_allocs([fast_dev], 'block_db',
+                                              1, 1, args)
+        assert len(fast) == 1
+        assert fast[0][0] == fast_dev.path
+
     def test_get_lvm_osds_return_len(self, factory,
                                      mock_lv_device_generator,
                                      conf_ceph_stub,