From: Raimund Sacherer Date: Tue, 12 May 2026 09:05:53 +0000 (+0200) Subject: ceph-volume: tolerate <=1% short-fall on requested db/wal size X-Git-Tag: testing/wip-yuri10-testing-20260526.155424-main~4^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=5f8a4de52a87a7a0aebaf787f86fa922673ba1f9;p=ceph-ci.git ceph-volume: tolerate <=1% short-fall on requested db/wal size When requested_size (e.g. 1 GiB) slightly exceeds abs_size (e.g. 1023.3 MiB lost to PE alignment), get_physical_fast_allocs() called exit(1) and aborted the whole batch. if the short-fall is within 1%, scale down to abs_size with an info log instead of aborting. Anything larger still hits the existing error path. Needs review: confirm 1% is the right threshold (maybe a lower percentage is sufficient) and that no caller assumes abs_size == requested_size after this branch. Signed-off-by: Raimund Sacherer --- diff --git a/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/src/ceph-volume/ceph_volume/devices/lvm/batch.py index 62083cba315..062ec731359 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/batch.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/batch.py @@ -90,6 +90,17 @@ def get_physical_fast_allocs(devices: List[device.Device], type_: str, fast_slot if requested_size <= abs_size: abs_size = requested_size relative_size = int(abs_size) / dev_size + elif (int(requested_size) > 0 and + (int(requested_size) - int(abs_size)) / int(requested_size) <= 0.01): + # Tolerance: if the requested size overshoots what can be + # fulfilled by <= 1% (e.g. 1GiB vs 1023.3MiB lost to PE + # alignment), silently scale down to abs_size instead of + # failing the whole batch. + mlogger.info( + '{} was requested for {}, fulfilling with {} (within 1 percent tolerance)'.format( + requested_size, '{}_size'.format(type_), abs_size, + )) + relative_size = int(abs_size) / dev_size else: mlogger.error( '{} was requested for {}, but only {} can be fulfilled'.format( diff --git a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py index adc7c45cd9b..a724b47410a 100644 --- a/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py +++ b/src/ceph-volume/ceph_volume/tests/devices/lvm/test_batch.py @@ -395,6 +395,46 @@ class TestBatch(object): assert len(fast) == 1 assert fast[0][0] == fast_dev.path + def test_get_physical_fast_allocs_tolerance_within_1_percent(self, factory, + conf_ceph_stub, + mock_device_generator): + # When requested_size overshoots the achievable abs_size by <=1% + # (e.g. PE alignment rounding 1 GiB down to ~1023.3 MiB), the + # allocator must scale down to abs_size silently instead of calling + # exit(1). + conf_ceph_stub('[global]\nfsid=asdf-lkjh') + # 20 GiB / 20 slots = 1 GiB abs_size; request 1 GiB + 100 KiB → ~0.01% + vg_size = 21474836480 + fast_dev = mock_device_generator() + fast_dev.vg_size = [vg_size] + fast_dev.vg_free = [vg_size] + requested = disk.Size(b=int(vg_size / 20) + 100 * 1024) + args = factory(block_db_slots=20, block_db_size=requested, + devices=['/dev/data']) + fast = batch.get_physical_fast_allocs([fast_dev], 'block_db', + 20, 1, args) + assert len(fast) == 1 + # abs_size is the achievable size, not the over-requested one + assert fast[0][2] == disk.Size(b=int(vg_size / 20)) + + def test_get_physical_fast_allocs_tolerance_over_1_percent(self, factory, + conf_ceph_stub, + mock_device_generator): + # Over the 1% threshold still aborts via exit(1). + conf_ceph_stub('[global]\nfsid=asdf-lkjh') + vg_size = 21474836480 + fast_dev = mock_device_generator() + fast_dev.vg_size = [vg_size] + fast_dev.vg_free = [vg_size] + # Request 2 GiB on a 1 GiB slot — ~100% overshoot. + requested = disk.Size(b=int(vg_size / 20) * 2) + args = factory(block_db_slots=20, block_db_size=requested, + devices=['/dev/data']) + with pytest.raises(SystemExit) as err: + batch.get_physical_fast_allocs([fast_dev], 'block_db', + 20, 1, args) + assert err.value.code == 1 + def test_get_lvm_osds_return_len(self, factory, mock_lv_device_generator, conf_ceph_stub,