From: Raimund Sacherer Date: Tue, 12 May 2026 09:05:26 +0000 (+0200) Subject: ceph-volume: allocate db/wal slot on partial fast-device VG X-Git-Tag: v21.0.1~146^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5a52afc701e676c39b0f79172653cf47e57cc441;p=ceph.git ceph-volume: allocate db/wal slot on partial fast-device VG On single-OSD redeploy where the fast device VG already has DB LVs for sibling OSDs, get_physical_fast_allocs() returned an empty list and ceph-volume fell back to a co-located OSD. Two fixes in get_physical_fast_allocs(): - abs_size = dev_size / slots_for_vg can exceed vg_free when other slots are still in use, so the while-loop never enters. Fall back to abs_size = free_size / fast_slots_per_device. - The loop counter was occupied_slots = len(dev.lvs), so on a partial VG the loop was aborted prematurely. Count only slots allocated in this call (new_slots) instead. Initial issues where silent creation of OSD without DB, which was fixed in commit 5c700ed7d64. After applying this fix we did not get OSDs deployed at all. Tested on RHCS 8 lab cluster (12 HDDs / 4 SSDs across 3 hosts, db_slots: 6, encrypted) Needs review: confirm new_slots match the original intent of the per-batch cap when multiple OSDs are deployed in one call. Fixes: https://tracker.ceph.com/issues/76522 Signed-off-by: Raimund Sacherer --- diff --git a/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/src/ceph-volume/ceph_volume/devices/lvm/batch.py index af29801f46e..62083cba315 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/batch.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/batch.py @@ -77,6 +77,14 @@ def get_physical_fast_allocs(devices: List[device.Device], type_: str, fast_slot # way abs_size = disk.Size(b=int(dev_size / slots_for_vg)) free_size = dev.vg_free[0] + # When a fast device VG is partially used (e.g. one OSD's DB/WAL + # is still live while its partner is being replaced), slots_for_vg + # can undercount the true slot capacity, making abs_size larger + # than free_size so the while loop never fires. Fall back to + # dividing the actual free space by the number of slots we still + # want to allocate. + if abs_size > free_size and fast_slots_per_device > 0: + abs_size = disk.Size(b=int(free_size / fast_slots_per_device)) relative_size = int(abs_size) / dev_size if requested_size: if requested_size <= abs_size: @@ -90,9 +98,17 @@ def get_physical_fast_allocs(devices: List[device.Device], type_: str, fast_slot abs_size, )) exit(1) - while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device: + # Track new allocations separately from pre-existing ones. + # fast_slots_per_device caps how many *new* slots this batch may + # add per device (distribution); requested_slots caps the *total* + # occupancy a device may carry (the spec's db_slots). + new_slots = 0 + while (abs_size <= free_size + and len(ret) < new_osds + and new_slots < fast_slots_per_device + and occupied_slots + new_slots < requested_slots): free_size -= abs_size.b - occupied_slots += 1 + new_slots += 1 ret.append((dev.path, relative_size, abs_size, requested_slots)) return ret diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py index 0300cb772d4..adc7c45cd9b 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py @@ -314,6 +314,33 @@ class TestBatch(object): db_device = [mock_device_generator()] fast = b.fast_allocations(db_device, 1, 1, 'block_db') assert len(fast) == 1 + # Layout: the allocation must reference the fast device, not the + # data device, with a non-trivial slot size. + assert fast[0][0] == db_device[0].path + assert int(fast[0][2]) > 0 + + def test_batch_fast_allocations_one_block_db_partial_vg(self, + factory, conf_ceph_stub, + mock_device_generator): + # Single-OSD redeploy at the Batch.fast_allocations() level (the + # one-call-up integration of get_physical_fast_allocs that exercises + # fast_slots_per_device recompute). When the fast device's VG already + # carries surviving DB LVs from sibling OSDs, fast_allocations must + # still produce one allocation on that fast device — not silently + # return [], which would let cephadm fall back to a co-located OSD. + # + # The spec sets db_slots: 6 (the per-device occupancy cap); only one + # OSD is being deployed in this batch, so fast_slots_per_device gets + # recomputed to 1, and the fast device already has 5 sibling LVs. + conf_ceph_stub('[global]\nfsid=asdf-lkjh') + + b = batch.Batch([]) + b.args.block_db_slots = 6 + db_device = [mock_device_generator(number_lvs=5)] + fast = b.fast_allocations(db_device, 1, 1, 'block_db') + assert len(fast) == 1 + assert fast[0][0] == db_device[0].path + assert int(fast[0][2]) > 0 @pytest.mark.parametrize('occupied_prior', range(7)) @pytest.mark.parametrize('slots,num_devs', @@ -348,6 +375,26 @@ class TestBatch(object): assert len([f for f in fast if f[0] == '/dev/bar']) == expected_assignment_on_used_devices assert len([f for f in fast if f[0] != '/dev/bar']) == expected_num_osds - expected_assignment_on_used_devices + def test_get_physical_fast_allocs_redeploy_partial_vg(self, factory, + conf_ceph_stub, + mock_device_generator): + # Single-OSD redeploy where the fast-device VG already hosts + # surviving DB LVs for sibling OSDs must still produce one allocation. + # Reproducer: db_slots=6 in the spec, 5 LVs already on the fast + # device, one new OSD being deployed in this batch, so + # Batch.fast_allocations() recomputes fast_slots_per_device down to 1. + # With the original `occupied_slots < fast_slots_per_device` loop + # guard, occupied_slots==5 >= 1 short-circuited the loop and + # get_physical_fast_allocs() returned an empty list. + conf_ceph_stub('[global]\nfsid=asdf-lkjh') + fast_dev = mock_device_generator(number_lvs=5) + args = factory(block_db_slots=6, block_db_size=None, + devices=['/dev/data']) + fast = batch.get_physical_fast_allocs([fast_dev], 'block_db', + 1, 1, args) + assert len(fast) == 1 + assert fast[0][0] == fast_dev.path + def test_get_lvm_osds_return_len(self, factory, mock_lv_device_generator, conf_ceph_stub,