From: Guillaume Abrioux Date: Tue, 5 May 2026 07:02:33 +0000 (+0200) Subject: ceph-volume: detect rotational media under dm-crypt for workqueue bypass X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=21a98b1405cc45dddd66f389b8f511c576f6451f;p=ceph.git ceph-volume: detect rotational media under dm-crypt for workqueue bypass bypass_workqueue() was inspecting the top level block device (e.g: /dev/mapper/*) when deciding whether to disable read/write workqueues for nvme devices, it must look at the real disk under dmcrypt/lvm, not the mapper. On osd block paths the top device often lies about rotational, so --perf-no_workqueue was wrong. The idea of this fix is to walk sysfs 'slaves/' to the leaf, then check rotational there (udev + rota). Fixes: https://tracker.ceph.com/issues/76805 Signed-off-by: Guillaume Abrioux --- diff --git a/src/ceph-volume/ceph_volume/tests/util/test_disk.py b/src/ceph-volume/ceph_volume/tests/util/test_disk.py index 76e2b3458d9..2eb0ea63706 100644 --- a/src/ceph-volume/ceph_volume/tests/util/test_disk.py +++ b/src/ceph-volume/ceph_volume/tests/util/test_disk.py @@ -76,6 +76,64 @@ class TestLsblkParser(object): assert result['SIZE'] == '10M' +class TestBackingDeviceIsRotational(object): + """Upper dm nodes lie in sysfs queue/rotational; we walk slaves/ to the leaf.""" + + @patch('os.path.realpath') + def test_sysfs_dm_stack_to_nvme(self, m_realpath, fake_filesystem): + m_realpath.return_value = '/dev/dm-5' + fake_filesystem.create_dir('/sys/block/dm-5/slaves/dm-4') + fake_filesystem.create_dir('/sys/block/dm-4/slaves/nvme0n1') + fake_filesystem.create_file('/sys/block/nvme0n1/queue/rotational', contents='0') + assert disk.BackingDeviceRotation.is_rotational('/dev/ceph-foo/osd-block-bar') is False + + @patch('os.path.realpath') + def test_plain_disk_reads_leaf_rotational(self, m_realpath, fake_filesystem): + m_realpath.return_value = '/dev/nvme0n1' + fake_filesystem.create_file('/sys/block/nvme0n1/queue/rotational', contents='0') + assert disk.BackingDeviceRotation.is_rotational('/dev/nvme0n1') is False + + @patch('ceph_volume.util.disk.get_partitions', return_value={'nvme0n1p1': 'nvme0n1'}) + @patch('os.path.realpath') + def test_partition_uses_parent_block_rotational( + self, m_realpath, m_get_partitions, fake_filesystem): + m_realpath.return_value = '/dev/nvme0n1p1' + fake_filesystem.create_file('/sys/block/nvme0n1/queue/rotational', contents='0') + assert disk.BackingDeviceRotation.is_rotational('/dev/nvme0n1p1') is False + + @patch('os.path.realpath') + def test_missing_sys_block_defaults_rotational(self, m_realpath, fake_filesystem): + m_realpath.return_value = '/dev/dm-99' + assert disk.BackingDeviceRotation.is_rotational('/dev/mapper/x') is True + + @patch('ceph_volume.util.disk.get_partitions', return_value={'sda1': 'sda'}) + @patch('os.path.realpath') + def test_dm_slave_partition_resolves_to_parent( + self, m_realpath, m_get_partitions, fake_filesystem): + m_realpath.return_value = '/dev/dm-0' + fake_filesystem.create_dir('/sys/block/dm-0/slaves/sda1') + fake_filesystem.create_file('/sys/block/sda/queue/rotational', contents='0') + assert disk.BackingDeviceRotation.is_rotational('/dev/dm-0') is False + + @patch('os.path.realpath') + def test_multi_slave_any_rotational(self, m_realpath, fake_filesystem): + m_realpath.return_value = '/dev/dm-0' + fake_filesystem.create_dir('/sys/block/dm-0/slaves/sda') + fake_filesystem.create_dir('/sys/block/dm-0/slaves/sdb') + fake_filesystem.create_file('/sys/block/sda/queue/rotational', contents='0') + fake_filesystem.create_file('/sys/block/sdb/queue/rotational', contents='1') + assert disk.BackingDeviceRotation.is_rotational('/dev/dm-0') is True + + @patch('os.listdir', side_effect=OSError(errno.EACCES, 'Permission denied')) + @patch('os.path.realpath') + def test_listdir_slaves_oserror_defaults_rotational( + self, m_realpath, m_listdir, fake_filesystem): + m_realpath.return_value = '/dev/dm-0' + fake_filesystem.create_dir('/sys/block/dm-0/slaves') + fake_filesystem.create_file('/sys/block/dm-0/queue/rotational', contents='0') + assert disk.BackingDeviceRotation.is_rotational('/dev/dm-0') is True + + class TestBlkidParser(object): def test_parses_whitespace_values(self): diff --git a/src/ceph-volume/ceph_volume/util/disk.py b/src/ceph-volume/ceph_volume/util/disk.py index 3efbeae0ae5..d3353cf9d00 100644 --- a/src/ceph-volume/ceph_volume/util/disk.py +++ b/src/ceph-volume/ceph_volume/util/disk.py @@ -222,6 +222,114 @@ def lsblk(device, columns=None, abspath=False): return result[0] + +class BackingDeviceRotation(object): + # Typical ceph-volume stacks are a few dm/LVM layers (eg: crypt over LV over disk). + # 32 leaves headroom for multipath/MD without unbounded sysfs recursion if slaves/ + # forms a cycle or an unexpectedly deep mapper chain. + _SYSFS_SLAVES_WALK_MAX_DEPTH = 32 + + @staticmethod + def _kname_from_path(device: str) -> str: + if not device: + return '' + try: + return os.path.basename(os.path.realpath(device)) + except OSError: + return '' + + @staticmethod + def _kname_for_sysfs_walk(kname: str) -> str: + if not kname: + return '' + if os.path.isdir(os.path.join('/sys/block', kname)): + return kname + try: + parent = get_partitions().get(kname) + except OSError as exc: + logger.debug('failed to resolve partition parent for %s: %s', kname, exc) + parent = None + if parent: + return parent + return kname + + @staticmethod + def _walk_sysfs_leaf_blocks(k: str, depth: int, found: set, seen: set) -> None: + k = BackingDeviceRotation._kname_for_sysfs_walk(k) + if not k or k in seen: + return + if depth > BackingDeviceRotation._SYSFS_SLAVES_WALK_MAX_DEPTH: + logger.warning( + 'sysfs slaves walk exceeded max depth %s at %s', + BackingDeviceRotation._SYSFS_SLAVES_WALK_MAX_DEPTH, + k, + ) + return + seen.add(k) + sys_block = os.path.join('/sys/block', k) + if not os.path.isdir(sys_block): + return + slaves_dir = os.path.join(sys_block, 'slaves') + slave_names: List[str] = [] + if os.path.isdir(slaves_dir): + try: + slave_names = os.listdir(slaves_dir) + except OSError as exc: + logger.debug( + 'failed to list sysfs slaves for %s: %s', slaves_dir, exc) + return + if not slave_names: + found.add(k) + return + for sn in slave_names: + BackingDeviceRotation._walk_sysfs_leaf_blocks( + BackingDeviceRotation._kname_for_sysfs_walk(sn), + depth + 1, + found, + seen, + ) + + @staticmethod + def _sysfs_leaf_block_knames(kname: str) -> List[str]: + found = set() + seen = set() + BackingDeviceRotation._walk_sysfs_leaf_blocks(kname, 0, found, seen) + return sorted(found) + + @staticmethod + def _leaf_block_is_rotational(kname: str) -> bool: + kname = BackingDeviceRotation._kname_for_sysfs_walk(kname) + dev_path = os.path.join('/dev', kname) + if os.path.exists(dev_path): + try: + udev_data = UdevData(dev_path) + env = udev_data.environment + if env.get('ID_SSD') == '1': + return False + rpm = env.get('ID_ATA_ROTATION_RATE_RPM', '') + if rpm.isdigit(): + return int(rpm) > 0 + except (RuntimeError, OSError, ValueError) as exc: + logger.debug( + 'failed to read udev rotational hints for %s: %s', + dev_path, exc) + + sys_block = os.path.join('/sys/block', kname) + rota = get_file_contents( + os.path.join(sys_block, 'queue/rotational'), '1') + return rota == '1' + + @staticmethod + def is_rotational(device: str) -> bool: + kname = BackingDeviceRotation._kname_from_path(device) + walk_root = BackingDeviceRotation._kname_for_sysfs_walk(kname) + leaves = BackingDeviceRotation._sysfs_leaf_block_knames(walk_root) + if not leaves: + return True + return any( + BackingDeviceRotation._leaf_block_is_rotational(leaf) for leaf in leaves) + + def lsblk_all(device: str = '', columns: Optional[List[str]] = None, abspath: bool = False) -> List[Dict[str, str]]: diff --git a/src/ceph-volume/ceph_volume/util/encryption.py b/src/ceph-volume/ceph_volume/util/encryption.py index 06a24b13c4c..367f01b613c 100644 --- a/src/ceph-volume/ceph_volume/util/encryption.py +++ b/src/ceph-volume/ceph_volume/util/encryption.py @@ -8,7 +8,13 @@ from ceph_volume import process, conf, terminal from ceph_volume.util import constants, system from ceph_volume.util.device import Device from .prepare import write_keyring -from .disk import lsblk, device_family, get_part_entry_type, _dd_read +from .disk import ( + lsblk, + device_family, + get_part_entry_type, + _dd_read, + BackingDeviceRotation, +) from packaging import version from typing import Any, Dict, List, Optional @@ -65,7 +71,8 @@ def set_dmcrypt_no_workqueue(target_version: str = '2.3.4') -> None: raise RuntimeError("Couldn't check the cryptsetup version.") def bypass_workqueue(device: str) -> bool: - return not Device(device).rotational and conf.dmcrypt_no_workqueue + return (not BackingDeviceRotation.is_rotational(device) + and conf.dmcrypt_no_workqueue) def get_key_size_from_conf(): """