]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
ceph-volume: fix bug with miscalculation of required db/wal slot size for VGs with...
authorCory Snyder <csnyder@iland.com>
Fri, 24 Sep 2021 15:56:09 +0000 (11:56 -0400)
committerCory Snyder <csnyder@iland.com>
Mon, 8 Nov 2021 15:51:56 +0000 (10:51 -0500)
Previous logic for calculating db/wal slot sizes made the assumption that there would only be
a single PV backing each db/wal VG. This wasn't the case for OSDs deployed prior to v15.2.8,
since ceph-volume previously deployed multiple SSDs in the same VG. This fix removes the
assumption and does the correct calculation in either case.

Fixes: https://tracker.ceph.com/issues/52730
Signed-off-by: Cory Snyder <csnyder@iland.com>
src/ceph-volume/ceph_volume/devices/lvm/batch.py
src/ceph-volume/ceph_volume/tests/conftest.py

index 0faa88ec0a88e72037ae93a3adf83311554e6721..0dfd809bdf3bd93ff21491e423ef475858523e90 100644 (file)
@@ -112,35 +112,55 @@ def get_physical_fast_allocs(devices, type_, fast_slots_per_device, new_osds, ar
         requested_size = get_size_fct(lv_format=False)
 
     ret = []
-    for dev in devices:
-        if not dev.available_lvm:
-            continue
-        # any LV present is considered a taken slot
-        occupied_slots = len(dev.lvs)
-        # this only looks at the first vg on device, unsure if there is a better
-        # way
-        dev_size = dev.vg_size[0]
-        abs_size = disk.Size(b=int(dev_size / requested_slots))
-        free_size = dev.vg_free[0]
-        relative_size = int(abs_size) / dev_size
-        if requested_size:
-            if requested_size <= abs_size:
-                abs_size = requested_size
-                relative_size = int(abs_size) / dev_size
-            else:
-                mlogger.error(
-                    '{} was requested for {}, but only {} can be fulfilled'.format(
-                        requested_size,
-                        '{}_size'.format(type_),
-                        abs_size,
-                    ))
-                exit(1)
-        while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
-            free_size -= abs_size.b
-            occupied_slots += 1
-            ret.append((dev.path, relative_size, abs_size, requested_slots))
+    vg_device_map = group_devices_by_vg(devices)
+    for vg_devices in vg_device_map.values():
+        for dev in vg_devices:
+            if not dev.available_lvm:
+                continue
+            # any LV present is considered a taken slot
+            occupied_slots = len(dev.lvs)
+            # prior to v15.2.8, db/wal deployments were grouping multiple fast devices into single VGs - we need to
+            # multiply requested_slots (per device) by the number of devices in the VG in order to ensure that
+            # abs_size is calculated correctly from vg_size
+            slots_for_vg = len(vg_devices) * requested_slots
+            dev_size = dev.vg_size[0]
+            # this only looks at the first vg on device, unsure if there is a better
+            # way
+            abs_size = disk.Size(b=int(dev_size / slots_for_vg))
+            free_size = dev.vg_free[0]
+            relative_size = int(abs_size) / dev_size
+            if requested_size:
+                if requested_size <= abs_size:
+                    abs_size = requested_size
+                    relative_size = int(abs_size) / dev_size
+                else:
+                    mlogger.error(
+                        '{} was requested for {}, but only {} can be fulfilled'.format(
+                            requested_size,
+                            '{}_size'.format(type_),
+                            abs_size,
+                        ))
+                    exit(1)
+            while abs_size <= free_size and len(ret) < new_osds and occupied_slots < fast_slots_per_device:
+                free_size -= abs_size.b
+                occupied_slots += 1
+                ret.append((dev.path, relative_size, abs_size, requested_slots))
     return ret
 
+def group_devices_by_vg(devices):
+    result = dict()
+    result['unused_devices'] = []
+    for dev in devices:
+        if len(dev.vgs) > 0:
+            # already using assumption that a PV only belongs to single VG in other places
+            vg_name = dev.vgs[0].name
+            if vg_name in result:
+                result[vg_name].append(dev)
+            else:
+                result[vg_name] = [dev]
+        else:
+            result['unused_devices'].append(dev)
+    return result
 
 def get_lvm_fast_allocs(lvs):
     return [("{}/{}".format(d.vg_name, d.lv_name), 100.0,
index 357881a53f58f36718120af5c6e61f8893bb5715..c41a46074439766f5d1a7534e8b156531420f931 100644 (file)
@@ -63,6 +63,9 @@ def mock_lv_device_generator():
 def mock_devices_available():
     dev = create_autospec(device.Device)
     dev.path = '/dev/foo'
+    dev.vg_name = 'vg_foo'
+    dev.lv_name = 'lv_foo'
+    dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, lv_name=dev.lv_name)]
     dev.available_lvm = True
     dev.vg_size = [21474836480]
     dev.vg_free = dev.vg_size
@@ -73,6 +76,9 @@ def mock_device_generator():
     def mock_device():
         dev = create_autospec(device.Device)
         dev.path = '/dev/foo'
+        dev.vg_name = 'vg_foo'
+        dev.lv_name = 'lv_foo'
+        dev.vgs = [lvm.VolumeGroup(vg_name=dev.vg_name, lv_name=dev.lv_name)]
         dev.available_lvm = True
         dev.vg_size = [21474836480]
         dev.vg_free = dev.vg_size