When devices have already been used for OSDs, they are still
allowed to pass filtering as they are still needed for the
resulting ceph-volume lvm batch command. This was causing an
issue with limit however. Limit adds the devices we've found
that match the filter and existing OSD daemons tied to the spec.
This allows double counting of devices that hae been used for
OSDs, as they're counted in terms of being an existing device
and that they match the filter. To avoid this issue, devices
should only be counted towards the limit if they are not already
part of an OSD.
An additional note: The limit feature is only applied for
data devices, so there is no need to worry about the effect
of this change on selection of db, wal, or journal devices.
Also, we would still want to not count these devices if they
did end up passing the data device filter but had been used
for a db/wal/journal device previously.
Fixes: https://tracker.ceph.com/issues/63525
Signed-off-by: Adam King <adking@redhat.com>
(cherry picked from commit
d3f1a0e1c0b98b9f1251837ecc8edc367e590dad)
logger.exception('Cannot decode JSON: \'%s\'' % ' '.join(out))
concat_out = {}
notes = []
- if osdspec.data_devices is not None and osdspec.data_devices.limit and len(concat_out) < osdspec.data_devices.limit:
+ if (
+ osdspec.data_devices is not None
+ and osdspec.data_devices.limit
+ and (len(concat_out) + ds.existing_daemons) < osdspec.data_devices.limit
+ ):
found = len(concat_out)
limit = osdspec.data_devices.limit
notes.append(
- f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit (Found: {found} | Limit: {limit})')
+ f'NOTE: Did not find enough disks matching filter on host {host} to reach data device limit\n'
+ f'(New Devices: {found} | Existing Matching Daemons: {ds.existing_daemons} | Limit: {limit})')
ret_all.append({'data': concat_out,
'osdspec': osdspec.service_id,
'host': host,
@mock.patch('cephadm.services.osd.OSDService.driveselection_to_ceph_volume')
@mock.patch('cephadm.services.osd.OsdIdClaims.refresh', lambda _: None)
@mock.patch('cephadm.services.osd.OsdIdClaims.get', lambda _: {})
- def test_limit_not_reached(self, d_to_cv, _run_cv_cmd, cephadm_module):
+ @mock.patch('cephadm.inventory.HostCache.get_daemons_by_service')
+ def test_limit_not_reached(self, _get_daemons_by_service, d_to_cv, _run_cv_cmd, cephadm_module):
with with_host(cephadm_module, 'test'):
dg = DriveGroupSpec(placement=PlacementSpec(host_pattern='test'),
data_devices=DeviceSelection(limit=5, rotational=1),
'[{"data": "/dev/vdb", "data_size": "50.00 GB", "encryption": "None"}, {"data": "/dev/vdc", "data_size": "50.00 GB", "encryption": "None"}]']
d_to_cv.return_value = 'foo'
_run_cv_cmd.side_effect = async_side_effect((disks_found, '', 0))
+ _get_daemons_by_service.return_value = [DaemonDescription(daemon_type='osd', hostname='test', service_name='not_enough')]
preview = cephadm_module.osd_service.generate_previews([dg], 'test')
for osd in preview:
assert 'notes' in osd
assert osd['notes'] == [
- 'NOTE: Did not find enough disks matching filter on host test to reach data device limit (Found: 2 | Limit: 5)']
+ ('NOTE: Did not find enough disks matching filter on host test to reach '
+ 'data device limit\n(New Devices: 2 | Existing Matching Daemons: 1 | Limit: 5)')]
@mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
def test_prepare_drivegroup(self, cephadm_module):
# type: () -> List[Device]
return self._journal
- def _limit_reached(self, device_filter, len_devices,
- disk_path):
- # type: (DeviceSelection, int, str) -> bool
+ def _limit_reached(
+ self,
+ device_filter: DeviceSelection,
+ devices: List[Device],
+ disk_path: str
+ ) -> bool:
""" Check for the <limit> property and apply logic
If a limit is set in 'device_attrs' we have to stop adding
If limit is set (>0) and len(devices) >= limit
- :param int len_devices: Length of the already populated device set/list
+ :param List[Device] devices: Already populated device set/list
:param str disk_path: The disk identifier (for logging purposes)
:return: True/False if the device should be added to the list of devices
:rtype: bool
"""
limit = device_filter.limit or 0
-
- if limit > 0 and (len_devices + self.existing_daemons >= limit):
+ # If device A is being used for an OSD already, it can still
+ # match the filter (this is necessary as we still want the
+ # device in the resulting ceph-volume lvm batch command).
+ # If that is the case, we don't want to count the device
+ # towards the limit as it will already be counted through the
+ # existing daemons
+ non_ceph_devices = [d for d in devices if not d.ceph_device]
+
+ if limit > 0 and (len(non_ceph_devices) + self.existing_daemons >= limit):
logger.debug("Refuse to add {} due to limit policy of <{}>".format(
disk_path, limit))
return True
continue
# break on this condition.
- if self._limit_reached(device_filter, len(devices), disk.path):
+ if self._limit_reached(device_filter, devices, disk.path):
logger.debug("Ignoring disk {}. Limit reached".format(
disk.path))
break
inventory = _mk_inventory(_mk_device(rotational=True)*2)
m = 'Failed to validate OSD spec "foobar.data_devices": No filters applied'
with pytest.raises(DriveGroupValidationError, match=m):
- drive_selection.DriveSelection(spec, inventory)
\ No newline at end of file
+ drive_selection.DriveSelection(spec, inventory)
+
+
+class TestDeviceSelectionLimit:
+
+ def test_limit_existing_devices(self):
+ # Initial setup for this test is meant to be that /dev/sda
+ # is already being used for an OSD, hence it being marked
+ # as a ceph_device. /dev/sdb and /dev/sdc are not being used
+ # for OSDs yet. The limit will be set to 2 and the DriveSelection
+ # is set to have 1 pre-existing device (corresponding to /dev/sda)
+ dev_a = Device('/dev/sda', ceph_device=True, available=False)
+ dev_b = Device('/dev/sdb', ceph_device=False, available=True)
+ dev_c = Device('/dev/sdc', ceph_device=False, available=True)
+ all_devices: List[Device] = [dev_a, dev_b, dev_c]
+ processed_devices: List[Device] = []
+ filter = DeviceSelection(all=True, limit=2)
+ dgs = DriveGroupSpec(data_devices=filter)
+ ds = drive_selection.DriveSelection(dgs, all_devices, existing_daemons=1)
+
+ # Check /dev/sda. It's already being used for an OSD and will
+ # be counted in existing_daemons. This check should return False
+ # as we are not over the limit.
+ assert not ds._limit_reached(filter, processed_devices, '/dev/sda')
+ processed_devices.append(dev_a)
+
+ # We should still not be over the limit here with /dev/sdb since
+ # we will have only one pre-existing daemon /dev/sdb itself. This
+ # case previously failed as devices that contributed to existing_daemons
+ # would be double counted both as devices and daemons.
+ assert not ds._limit_reached(filter, processed_devices, '/dev/sdb')
+ processed_devices.append(dev_b)
+
+ # Now, with /dev/sdb and the pre-existing daemon taking up the 2
+ # slots, /dev/sdc should be rejected for us being over the limit.
+ assert ds._limit_reached(filter, processed_devices, '/dev/sdc')
+
+ # DriveSelection does device assignment on initialization. Let's check
+ # it picked up the expected devices
+ assert ds._data == [dev_a, dev_b]