From 302b08aa26e4634c07985f5c6e17dedb11c089c1 Mon Sep 17 00:00:00 2001 From: Francesco Pantano Date: Fri, 23 Dec 2022 14:45:40 +0100 Subject: [PATCH] Add per OSD crush_device_class definition This patch introduces a per osd crush_device_class definition in the DriveGroup spec. The Device object is extended to support a crush_device_class parameter which is processed by ceph-volume when drives are prepared in batch mode. According to the per osd defined crush device classes, drives are collected and grouped in a dict that is used to produce a set of ceph-volume commands that eventually apply (if defined) the right device class. The test_drive_group unit tests are also extended to make sure we're not breaking compatibility with the default definition and the new syntax is validated, raising an exception if it's violated. Fixes: https://tracker.ceph.com/issues/58184 Signed-off-by: Francesco Pantano (cherry picked from commit 6c6cb2f5130dbcf8e42cf03666173948411fc92b) Conflicts: src/python-common/ceph/deployment/drive_group.py --- doc/cephadm/services/osd.rst | 51 ++++ .../ceph/deployment/drive_group.py | 20 +- .../ceph/deployment/inventory.py | 9 +- .../ceph/deployment/translate.py | 196 ++++++++++---- .../ceph/tests/test_drive_group.py | 250 +++++++++++++++++- 5 files changed, 470 insertions(+), 56 deletions(-) diff --git a/doc/cephadm/services/osd.rst b/doc/cephadm/services/osd.rst index 8400d8336ecf9..d9554006c1a69 100644 --- a/doc/cephadm/services/osd.rst +++ b/doc/cephadm/services/osd.rst @@ -914,6 +914,57 @@ It is also possible to specify directly device paths in specific hosts like the This can easily be done with other filters, like `size` or `vendor` as well. +It's possible to specify the `crush_device_class` parameter within the +DriveGroup spec, and it's applied to all the devices defined by the `paths` +keyword: + +.. code-block:: yaml + + service_type: osd + service_id: osd_using_paths + placement: + hosts: + - Node01 + - Node02 + crush_device_class: ssd + spec: + data_devices: + paths: + - /dev/sdb + - /dev/sdc + db_devices: + paths: + - /dev/sdd + wal_devices: + paths: + - /dev/sde + +The `crush_device_class` parameter, however, can be defined for each OSD passed +using the `paths` keyword with the following syntax: + +.. code-block:: yaml + + service_type: osd + service_id: osd_using_paths + placement: + hosts: + - Node01 + - Node02 + crush_device_class: ssd + spec: + data_devices: + paths: + - path: /dev/sdb + crush_device_class: ssd + - path: /dev/sdc + crush_device_class: nvme + db_devices: + paths: + - /dev/sdd + wal_devices: + paths: + - /dev/sde + .. _cephadm-osd-activate: Activate existing OSDs diff --git a/src/python-common/ceph/deployment/drive_group.py b/src/python-common/ceph/deployment/drive_group.py index 54d8cc27d55ef..b8397ca2789d7 100644 --- a/src/python-common/ceph/deployment/drive_group.py +++ b/src/python-common/ceph/deployment/drive_group.py @@ -30,7 +30,7 @@ class DeviceSelection(object): ] def __init__(self, - paths=None, # type: Optional[List[str]] + paths=None, # type: Optional[List[Dict[str, str]]] model=None, # type: Optional[str] size=None, # type: Optional[str] rotational=None, # type: Optional[bool] @@ -42,7 +42,16 @@ class DeviceSelection(object): ephemeral drive group device specification """ #: List of Device objects for devices paths. - self.paths = [] if paths is None else [Device(path) for path in paths] # type: List[Device] + + self.paths = [] + + if paths is not None: + for device in paths: + if isinstance(device, dict): + path: str = device.get("path", '') + self.paths.append(Device(path, crush_device_class=device.get("crush_device_class", None))) # noqa E501 + else: + self.paths.append(Device(str(device))) #: A wildcard string. e.g: "SDD*" or "SanDisk SD8SN8U5" self.model = model @@ -178,9 +187,9 @@ class DriveGroupSpec(ServiceSpec): extra_entrypoint_args: Optional[List[str]] = None, data_allocate_fraction=None, # type: Optional[float] method=None, # type: Optional[OSDMethod] - crush_device_class=None, # type: Optional[str] config=None, # type: Optional[Dict[str, str]] custom_configs=None, # type: Optional[List[CustomConfig]] + crush_device_class=None, # type: Optional[str] ): assert service_type is None or service_type == 'osd' super(DriveGroupSpec, self).__init__('osd', service_id=service_id, @@ -355,5 +364,10 @@ class DriveGroupSpec(ServiceSpec): self.service_id, 'method raw only supports bluestore') + if self.data_devices.paths is not None: + for device in list(self.data_devices.paths): + if not device.path: + raise DriveGroupValidationError(self.service_id, 'Device path cannot be empty') # noqa E501 + yaml.add_representer(DriveGroupSpec, DriveGroupSpec.yaml_representer) diff --git a/src/python-common/ceph/deployment/inventory.py b/src/python-common/ceph/deployment/inventory.py index 4345597b11d09..c57b469f8ca70 100644 --- a/src/python-common/ceph/deployment/inventory.py +++ b/src/python-common/ceph/deployment/inventory.py @@ -54,6 +54,7 @@ class Device(object): 'human_readable_type', 'device_id', 'lsm_data', + 'crush_device_class' ] def __init__(self, @@ -65,8 +66,10 @@ class Device(object): device_id=None, # type: Optional[str] lsm_data=None, # type: Optional[Dict[str, Dict[str, str]]] created=None, # type: Optional[datetime.datetime] - ceph_device=None # type: Optional[bool] + ceph_device=None, # type: Optional[bool] + crush_device_class=None # type: Optional[str] ): + self.path = path self.sys_api = sys_api if sys_api is not None else {} # type: Dict[str, Any] self.available = available @@ -76,6 +79,7 @@ class Device(object): self.lsm_data = lsm_data if lsm_data is not None else {} # type: Dict[str, Dict[str, str]] self.created = created if created is not None else datetime_now() self.ceph_device = ceph_device + self.crush_device_class = crush_device_class def __eq__(self, other): # type: (Any) -> bool @@ -124,7 +128,8 @@ class Device(object): 'path': self.path if self.path is not None else 'unknown', 'lvs': self.lvs if self.lvs else 'None', 'available': str(self.available), - 'ceph_device': str(self.ceph_device) + 'ceph_device': str(self.ceph_device), + 'crush_device_class': str(self.crush_device_class) } if not self.available and self.rejected_reasons: device_desc['rejection reasons'] = self.rejected_reasons diff --git a/src/python-common/ceph/deployment/translate.py b/src/python-common/ceph/deployment/translate.py index 2d373732c04f1..0f404af305b10 100644 --- a/src/python-common/ceph/deployment/translate.py +++ b/src/python-common/ceph/deployment/translate.py @@ -1,7 +1,7 @@ import logging try: - from typing import Optional, List + from typing import Optional, List, Dict except ImportError: pass @@ -13,6 +13,10 @@ logger = logging.getLogger(__name__) # TODO refactor this to a DriveSelection method class to_ceph_volume(object): + _supported_device_classes = [ + "hdd", "ssd", "nvme" + ] + def __init__(self, selection, # type: DriveSelection osd_id_claims=None, # type: Optional[List[str]] @@ -24,73 +28,161 @@ class to_ceph_volume(object): self.preview = preview self.osd_id_claims = osd_id_claims + def prepare_devices(self): + + # type: () -> Dict[str, List[str]] + + lvcount: Dict[str, List[str]] = dict() + + """ + Default entry for the global crush_device_class definition; + if there's no global definition at spec level, we do not want + to apply anything to the provided devices, hence we need to run + a ceph-volume command without that option, otherwise we init an + entry for the globally defined crush_device_class. + """ + if self.spec.crush_device_class: + lvcount[self.spec.crush_device_class] = [] + + # entry where the drives that don't require a crush_device_class + # option are collected + lvcount["no_crush"] = [] + + """ + for each device, check if it's just a path or it has a crush_device + class definition, and append an entry to the right crush_device_ + class group + """ + for device in self.selection.data_devices(): + # iterate on List[Device], containing both path and + # crush_device_class + path = device.path + crush_device_class = device.crush_device_class + + if path is None: + raise ValueError("Device path can't be empty") + + """ + if crush_device_class is specified for the current Device path + we should either init the array for this group or append the + drive path to the existing entry + """ + if crush_device_class: + if crush_device_class in lvcount.keys(): + lvcount[crush_device_class].append(path) + else: + lvcount[crush_device_class] = [path] + continue + + """ + if no crush_device_class is specified for the current path + but a global definition is present in the spec, so we group + the drives together + """ + if crush_device_class is None and self.spec.crush_device_class: + lvcount[self.spec.crush_device_class].append(path) + continue + else: + # default use case + lvcount["no_crush"].append(path) + continue + + return lvcount + def run(self): # type: () -> List[str] """ Generate ceph-volume commands based on the DriveGroup filters """ - data_devices = [x.path for x in self.selection.data_devices()] + db_devices = [x.path for x in self.selection.db_devices()] wal_devices = [x.path for x in self.selection.wal_devices()] journal_devices = [x.path for x in self.selection.journal_devices()] - if not data_devices: + if not self.selection.data_devices(): return [] cmds: List[str] = [] - if self.spec.method == 'raw': - assert self.spec.objectstore == 'bluestore' - # ceph-volume raw prepare only support 1:1 ratio of data to db/wal devices - if data_devices and db_devices: - if len(data_devices) != len(db_devices): - raise ValueError('Number of data devices must match number of ' - 'db devices for raw mode osds') - if data_devices and wal_devices: - if len(data_devices) != len(wal_devices): - raise ValueError('Number of data devices must match number of ' - 'wal devices for raw mode osds') - # for raw prepare each data device needs its own prepare command - dev_counter = 0 - while dev_counter < len(data_devices): - cmd = "raw prepare --bluestore" - cmd += " --data {}".format(data_devices[dev_counter]) - if db_devices: - cmd += " --block.db {}".format(db_devices[dev_counter]) - if wal_devices: - cmd += " --block.wal {}".format(wal_devices[dev_counter]) - cmds.append(cmd) - dev_counter += 1 - - elif self.spec.objectstore == 'filestore': + + devices = self.prepare_devices() + # get the total number of devices provided by the Dict[str, List[str]] + devices_count = len(sum(list(devices.values()), [])) + + if devices and db_devices: + if (devices_count != len(db_devices)) and (self.spec.method == 'raw'): + raise ValueError('Number of data devices must match number of ' + 'db devices for raw mode osds') + + if devices and wal_devices: + if (devices_count != len(wal_devices)) and (self.spec.method == 'raw'): + raise ValueError('Number of data devices must match number of ' + 'wal devices for raw mode osds') + + # For this use case we don't apply any custom crush_device_classes + # Note that filestore is not supported anymore by the DriveGroupSpec + if self.spec.objectstore == 'filestore': # for lvm batch we can just do all devices in one command + devs: List = sum(list(devices.values()), []) + cmd = "lvm batch --no-auto" - cmd += " {}".format(" ".join(data_devices)) + cmd += " {}".format(" ".join(devs)) if self.spec.journal_size: cmd += " --journal-size {}".format(self.spec.journal_size) if journal_devices: cmd += " --journal-devices {}".format( - ' '.join(journal_devices)) + ' '.join(journal_devices)) cmd += " --filestore" cmds.append(cmd) - - elif self.spec.objectstore == 'bluestore': - # for lvm batch we can just do all devices in one command - cmd = "lvm batch --no-auto {}".format(" ".join(data_devices)) - - if db_devices: - cmd += " --db-devices {}".format(" ".join(db_devices)) - - if wal_devices: - cmd += " --wal-devices {}".format(" ".join(wal_devices)) - - if self.spec.block_wal_size: - cmd += " --block-wal-size {}".format(self.spec.block_wal_size) - - if self.spec.block_db_size: - cmd += " --block-db-size {}".format(self.spec.block_db_size) - cmds.append(cmd) + else: + for d in devices.keys(): + data_devices: Optional[List[str]] = devices.get(d) + if not data_devices: + continue + + if self.spec.method == 'raw': + assert self.spec.objectstore == 'bluestore' + # ceph-volume raw prepare only support 1:1 ratio of data to db/wal devices + # for raw prepare each data device needs its own prepare command + dev_counter = 0 + # reversing the lists as we're assigning db_devices sequentially + db_devices.reverse() + wal_devices.reverse() + + while dev_counter < len(data_devices): + cmd = "raw prepare --bluestore" + cmd += " --data {}".format(data_devices[dev_counter]) + if db_devices: + cmd += " --block.db {}".format(db_devices.pop()) + if wal_devices: + cmd += " --block.wal {}".format(wal_devices.pop()) + if d in self._supported_device_classes: + cmd += " --crush-device-class {}".format(d) + + cmds.append(cmd) + dev_counter += 1 + + elif self.spec.objectstore == 'bluestore': + # for lvm batch we can just do all devices in one command + + cmd = "lvm batch --no-auto {}".format(" ".join(data_devices)) + + if db_devices: + cmd += " --db-devices {}".format(" ".join(db_devices)) + + if wal_devices: + cmd += " --wal-devices {}".format(" ".join(wal_devices)) + + if self.spec.block_wal_size: + cmd += " --block-wal-size {}".format(self.spec.block_wal_size) + + if self.spec.block_db_size: + cmd += " --block-db-size {}".format(self.spec.block_db_size) + + if d in self._supported_device_classes: + cmd += " --crush-device-class {}".format(d) + cmds.append(cmd) for i in range(len(cmds)): if self.spec.encrypted: @@ -109,8 +201,16 @@ class to_ceph_volume(object): cmds[i] += " --yes" cmds[i] += " --no-systemd" - if self.spec.crush_device_class: - cmds[i] += " --crush-device-class {}".format(self.spec.crush_device_class) + # set the --crush-device-class option when: + # - crush_device_class is specified at spec level (global for all the osds) # noqa E501 + # - crush_device_class is allowed + # - there's no override at osd level + if ( + self.spec.crush_device_class and + self.spec.crush_device_class in self._supported_device_classes and # noqa E501 + "crush-device-class" not in cmds[i] + ): + cmds[i] += " --crush-device-class {}".format(self.spec.crush_device_class) # noqa E501 if self.preview: cmds[i] += " --report" diff --git a/src/python-common/ceph/tests/test_drive_group.py b/src/python-common/ceph/tests/test_drive_group.py index 2fc66b737fcd0..faa001a0233ab 100644 --- a/src/python-common/ceph/tests/test_drive_group.py +++ b/src/python-common/ceph/tests/test_drive_group.py @@ -19,9 +19,10 @@ from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection, \ service_id: testing_drivegroup placement: host_pattern: hostname +crush_device_class: ssd data_devices: paths: - - /dev/sda + - /dev/sda """ ), ( @@ -31,14 +32,19 @@ placement: host_pattern: hostname data_devices: paths: - - /dev/sda""" + - path: /dev/sda + crush_device_class: ssd""" ), ]) def test_DriveGroup(test_input): + dg = DriveGroupSpec.from_json(yaml.safe_load(test_input)) assert dg.service_id == 'testing_drivegroup' assert all([isinstance(x, Device) for x in dg.data_devices.paths]) - assert dg.data_devices.paths[0].path == '/dev/sda' + if isinstance(dg.data_devices.paths[0].path, str): + assert dg.data_devices.paths[0].path == '/dev/sda' + + @pytest.mark.parametrize("match,test_input", [ @@ -301,6 +307,129 @@ def test_ceph_volume_command_9(): cmds = translate.to_ceph_volume(sel, []).run() assert all(cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --data-allocate-fraction 0.8 --yes --no-systemd' for cmd in cmds), f'Expected {cmd} in {cmds}' + +@pytest.mark.parametrize("test_input_base", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +crush_device_class: ssd +data_devices: + paths: + - /dev/sda +""" + ), + ]) +def test_ceph_volume_command_10(test_input_base): + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input_base)) + spec.validate() + drive = drive_selection.DriveSelection(spec, spec.data_devices.paths) + cmds = translate.to_ceph_volume(drive, []).run() + + assert all(cmd == 'lvm batch --no-auto /dev/sda --crush-device-class ssd --yes --no-systemd' for cmd in cmds), f'Expected {cmd} in {cmds}' + + +@pytest.mark.parametrize("test_input1", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +crush_device_class: ssd +data_devices: + paths: + - path: /dev/sda + crush_device_class: hdd + - path: /dev/sdb + crush_device_class: hdd +""" + ), + ]) +def test_ceph_volume_command_11(test_input1): + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input1)) + spec.validate() + drive = drive_selection.DriveSelection(spec, spec.data_devices.paths) + cmds = translate.to_ceph_volume(drive, []).run() + + assert all(cmd == 'lvm batch --no-auto /dev/sda /dev/sdb --crush-device-class hdd --yes --no-systemd' for cmd in cmds), f'Expected {cmd} in {cmds}' + + +@pytest.mark.parametrize("test_input2", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +crush_device_class: ssd +data_devices: + paths: + - path: /dev/sda + crush_device_class: hdd + - path: /dev/sdb +""" + ), + ]) +def test_ceph_volume_command_12(test_input2): + + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input2)) + spec.validate() + drive = drive_selection.DriveSelection(spec, spec.data_devices.paths) + cmds = translate.to_ceph_volume(drive, []).run() + + assert (cmds[0] == 'lvm batch --no-auto /dev/sdb --crush-device-class ssd --yes --no-systemd') # noqa E501 + assert (cmds[1] == 'lvm batch --no-auto /dev/sda --crush-device-class hdd --yes --no-systemd') # noqa E501 + + +@pytest.mark.parametrize("test_input3", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +data_devices: + paths: + - path: /dev/sda + crush_device_class: hdd + - path: /dev/sdb +""" + ), + ]) +def test_ceph_volume_command_13(test_input3): + + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input3)) + spec.validate() + drive = drive_selection.DriveSelection(spec, spec.data_devices.paths) + cmds = translate.to_ceph_volume(drive, []).run() + + assert (cmds[0] == 'lvm batch --no-auto /dev/sdb --yes --no-systemd') # noqa E501 + assert (cmds[1] == 'lvm batch --no-auto /dev/sda --crush-device-class hdd --yes --no-systemd') # noqa E501 + + +@pytest.mark.parametrize("test_input4", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +data_devices: + paths: + - crush_device_class: hdd +""" + ), + ]) +def test_ceph_volume_command_14(test_input4): + + with pytest.raises(DriveGroupValidationError, match='Device path'): + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input4)) + spec.validate() + + def test_raw_ceph_volume_command_0(): spec = DriveGroupSpec(placement=PlacementSpec(host_pattern='*'), service_id='foobar', @@ -334,3 +463,118 @@ def test_raw_ceph_volume_command_1(): sel = drive_selection.DriveSelection(spec, inventory) with pytest.raises(ValueError): cmds = translate.to_ceph_volume(sel, []).run() + +@pytest.mark.parametrize("test_input5", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +method: raw +data_devices: + paths: + - path: /dev/sda + crush_device_class: hdd + - path: /dev/sdb + crush_device_class: hdd + - path: /dev/sdc + crush_device_class: hdd +db_devices: + paths: + - /dev/sdd + - /dev/sde + - /dev/sdf + +""" + ), + ]) +def test_raw_ceph_volume_command_2(test_input5): + + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input5)) + spec.validate() + drive = drive_selection.DriveSelection(spec, spec.data_devices.paths) + cmds = translate.to_ceph_volume(drive, []).run() + + assert cmds[0] == 'raw prepare --bluestore --data /dev/sda --block.db /dev/sdd --crush-device-class hdd' + assert cmds[1] == 'raw prepare --bluestore --data /dev/sdb --block.db /dev/sde --crush-device-class hdd' + assert cmds[2] == 'raw prepare --bluestore --data /dev/sdc --block.db /dev/sdf --crush-device-class hdd' + + +@pytest.mark.parametrize("test_input6", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +method: raw +data_devices: + paths: + - path: /dev/sda + crush_device_class: hdd + - path: /dev/sdb + crush_device_class: hdd + - path: /dev/sdc + crush_device_class: ssd +db_devices: + paths: + - /dev/sdd + - /dev/sde + - /dev/sdf + +""" + ), + ]) +def test_raw_ceph_volume_command_3(test_input6): + + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input6)) + spec.validate() + drive = drive_selection.DriveSelection(spec, spec.data_devices.paths) + cmds = translate.to_ceph_volume(drive, []).run() + + assert cmds[0] == 'raw prepare --bluestore --data /dev/sda --block.db /dev/sdd --crush-device-class hdd' + assert cmds[1] == 'raw prepare --bluestore --data /dev/sdb --block.db /dev/sde --crush-device-class hdd' + assert cmds[2] == 'raw prepare --bluestore --data /dev/sdc --block.db /dev/sdf --crush-device-class ssd' + + +@pytest.mark.parametrize("test_input7", +[ + ( + """service_type: osd +service_id: testing_drivegroup +placement: + host_pattern: hostname +method: raw +data_devices: + paths: + - path: /dev/sda + crush_device_class: hdd + - path: /dev/sdb + crush_device_class: nvme + - path: /dev/sdc + crush_device_class: ssd +db_devices: + paths: + - /dev/sdd + - /dev/sde + - /dev/sdf +wal_devices: + paths: + - /dev/sdg + - /dev/sdh + - /dev/sdi + +""" + ), + ]) +def test_raw_ceph_volume_command_4(test_input7): + + spec = DriveGroupSpec.from_json(yaml.safe_load(test_input7)) + spec.validate() + drive = drive_selection.DriveSelection(spec, spec.data_devices.paths) + cmds = translate.to_ceph_volume(drive, []).run() + + assert cmds[0] == 'raw prepare --bluestore --data /dev/sda --block.db /dev/sdd --block.wal /dev/sdg --crush-device-class hdd' + assert cmds[1] == 'raw prepare --bluestore --data /dev/sdb --block.db /dev/sdf --block.wal /dev/sdi --crush-device-class nvme' + assert cmds[2] == 'raw prepare --bluestore --data /dev/sdc --block.db /dev/sde --block.wal /dev/sdh --crush-device-class ssd' -- 2.39.5