]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-volume: detect rotational media under dm-crypt for workqueue bypass
authorGuillaume Abrioux <gabrioux@ibm.com>
Tue, 5 May 2026 07:02:33 +0000 (09:02 +0200)
committerGitHub <noreply@github.com>
Tue, 26 May 2026 14:12:50 +0000 (14:12 +0000)
bypass_workqueue() was inspecting the top level block device
(e.g: /dev/mapper/*) when deciding whether to disable read/write
workqueues for nvme devices, it must look at the real disk under
dmcrypt/lvm, not the mapper. On osd block paths the top device
often lies about rotational, so --perf-no_workqueue was wrong.

The idea of this fix is to walk sysfs 'slaves/' to the leaf, then
check rotational there (udev + rota).

Fixes: https://tracker.ceph.com/issues/76805
Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/ceph-volume/ceph_volume/tests/util/test_disk.py
src/ceph-volume/ceph_volume/util/disk.py
src/ceph-volume/ceph_volume/util/encryption.py

index 76e2b3458d92b80eaea05a5d51419f6931c591b0..2eb0ea63706486c5cd557e2f74de04dc6e124df7 100644 (file)
@@ -76,6 +76,64 @@ class TestLsblkParser(object):
         assert result['SIZE'] == '10M'
 
 
+class TestBackingDeviceIsRotational(object):
+    """Upper dm nodes lie in sysfs queue/rotational; we walk slaves/ to the leaf."""
+
+    @patch('os.path.realpath')
+    def test_sysfs_dm_stack_to_nvme(self, m_realpath, fake_filesystem):
+        m_realpath.return_value = '/dev/dm-5'
+        fake_filesystem.create_dir('/sys/block/dm-5/slaves/dm-4')
+        fake_filesystem.create_dir('/sys/block/dm-4/slaves/nvme0n1')
+        fake_filesystem.create_file('/sys/block/nvme0n1/queue/rotational', contents='0')
+        assert disk.BackingDeviceRotation.is_rotational('/dev/ceph-foo/osd-block-bar') is False
+
+    @patch('os.path.realpath')
+    def test_plain_disk_reads_leaf_rotational(self, m_realpath, fake_filesystem):
+        m_realpath.return_value = '/dev/nvme0n1'
+        fake_filesystem.create_file('/sys/block/nvme0n1/queue/rotational', contents='0')
+        assert disk.BackingDeviceRotation.is_rotational('/dev/nvme0n1') is False
+
+    @patch('ceph_volume.util.disk.get_partitions', return_value={'nvme0n1p1': 'nvme0n1'})
+    @patch('os.path.realpath')
+    def test_partition_uses_parent_block_rotational(
+            self, m_realpath, m_get_partitions, fake_filesystem):
+        m_realpath.return_value = '/dev/nvme0n1p1'
+        fake_filesystem.create_file('/sys/block/nvme0n1/queue/rotational', contents='0')
+        assert disk.BackingDeviceRotation.is_rotational('/dev/nvme0n1p1') is False
+
+    @patch('os.path.realpath')
+    def test_missing_sys_block_defaults_rotational(self, m_realpath, fake_filesystem):
+        m_realpath.return_value = '/dev/dm-99'
+        assert disk.BackingDeviceRotation.is_rotational('/dev/mapper/x') is True
+
+    @patch('ceph_volume.util.disk.get_partitions', return_value={'sda1': 'sda'})
+    @patch('os.path.realpath')
+    def test_dm_slave_partition_resolves_to_parent(
+            self, m_realpath, m_get_partitions, fake_filesystem):
+        m_realpath.return_value = '/dev/dm-0'
+        fake_filesystem.create_dir('/sys/block/dm-0/slaves/sda1')
+        fake_filesystem.create_file('/sys/block/sda/queue/rotational', contents='0')
+        assert disk.BackingDeviceRotation.is_rotational('/dev/dm-0') is False
+
+    @patch('os.path.realpath')
+    def test_multi_slave_any_rotational(self, m_realpath, fake_filesystem):
+        m_realpath.return_value = '/dev/dm-0'
+        fake_filesystem.create_dir('/sys/block/dm-0/slaves/sda')
+        fake_filesystem.create_dir('/sys/block/dm-0/slaves/sdb')
+        fake_filesystem.create_file('/sys/block/sda/queue/rotational', contents='0')
+        fake_filesystem.create_file('/sys/block/sdb/queue/rotational', contents='1')
+        assert disk.BackingDeviceRotation.is_rotational('/dev/dm-0') is True
+
+    @patch('os.listdir', side_effect=OSError(errno.EACCES, 'Permission denied'))
+    @patch('os.path.realpath')
+    def test_listdir_slaves_oserror_defaults_rotational(
+            self, m_realpath, m_listdir, fake_filesystem):
+        m_realpath.return_value = '/dev/dm-0'
+        fake_filesystem.create_dir('/sys/block/dm-0/slaves')
+        fake_filesystem.create_file('/sys/block/dm-0/queue/rotational', contents='0')
+        assert disk.BackingDeviceRotation.is_rotational('/dev/dm-0') is True
+
+
 class TestBlkidParser(object):
 
     def test_parses_whitespace_values(self):
index 3efbeae0ae5c21ff546962a12af9ba14ab50fe07..d3353cf9d008b83c17a2ae3a9881375c29ce0abc 100644 (file)
@@ -222,6 +222,114 @@ def lsblk(device, columns=None, abspath=False):
 
     return result[0]
 
+
+class BackingDeviceRotation(object):
+    # Typical ceph-volume stacks are a few dm/LVM layers (eg: crypt over LV over disk).
+    # 32 leaves headroom for multipath/MD without unbounded sysfs recursion if slaves/
+    # forms a cycle or an unexpectedly deep mapper chain.
+    _SYSFS_SLAVES_WALK_MAX_DEPTH = 32
+
+    @staticmethod
+    def _kname_from_path(device: str) -> str:
+        if not device:
+            return ''
+        try:
+            return os.path.basename(os.path.realpath(device))
+        except OSError:
+            return ''
+
+    @staticmethod
+    def _kname_for_sysfs_walk(kname: str) -> str:
+        if not kname:
+            return ''
+        if os.path.isdir(os.path.join('/sys/block', kname)):
+            return kname
+        try:
+            parent = get_partitions().get(kname)
+        except OSError as exc:
+            logger.debug('failed to resolve partition parent for %s: %s', kname, exc)
+            parent = None
+        if parent:
+            return parent
+        return kname
+
+    @staticmethod
+    def _walk_sysfs_leaf_blocks(k: str, depth: int, found: set, seen: set) -> None:
+        k = BackingDeviceRotation._kname_for_sysfs_walk(k)
+        if not k or k in seen:
+            return
+        if depth > BackingDeviceRotation._SYSFS_SLAVES_WALK_MAX_DEPTH:
+            logger.warning(
+                'sysfs slaves walk exceeded max depth %s at %s',
+                BackingDeviceRotation._SYSFS_SLAVES_WALK_MAX_DEPTH,
+                k,
+            )
+            return
+        seen.add(k)
+        sys_block = os.path.join('/sys/block', k)
+        if not os.path.isdir(sys_block):
+            return
+        slaves_dir = os.path.join(sys_block, 'slaves')
+        slave_names: List[str] = []
+        if os.path.isdir(slaves_dir):
+            try:
+                slave_names = os.listdir(slaves_dir)
+            except OSError as exc:
+                logger.debug(
+                    'failed to list sysfs slaves for %s: %s', slaves_dir, exc)
+                return
+        if not slave_names:
+            found.add(k)
+            return
+        for sn in slave_names:
+            BackingDeviceRotation._walk_sysfs_leaf_blocks(
+                BackingDeviceRotation._kname_for_sysfs_walk(sn),
+                depth + 1,
+                found,
+                seen,
+            )
+
+    @staticmethod
+    def _sysfs_leaf_block_knames(kname: str) -> List[str]:
+        found = set()
+        seen = set()
+        BackingDeviceRotation._walk_sysfs_leaf_blocks(kname, 0, found, seen)
+        return sorted(found)
+
+    @staticmethod
+    def _leaf_block_is_rotational(kname: str) -> bool:
+        kname = BackingDeviceRotation._kname_for_sysfs_walk(kname)
+        dev_path = os.path.join('/dev', kname)
+        if os.path.exists(dev_path):
+            try:
+                udev_data = UdevData(dev_path)
+                env = udev_data.environment
+                if env.get('ID_SSD') == '1':
+                    return False
+                rpm = env.get('ID_ATA_ROTATION_RATE_RPM', '')
+                if rpm.isdigit():
+                    return int(rpm) > 0
+            except (RuntimeError, OSError, ValueError) as exc:
+                logger.debug(
+                    'failed to read udev rotational hints for %s: %s',
+                    dev_path, exc)
+
+        sys_block = os.path.join('/sys/block', kname)
+        rota = get_file_contents(
+            os.path.join(sys_block, 'queue/rotational'), '1')
+        return rota == '1'
+
+    @staticmethod
+    def is_rotational(device: str) -> bool:
+        kname = BackingDeviceRotation._kname_from_path(device)
+        walk_root = BackingDeviceRotation._kname_for_sysfs_walk(kname)
+        leaves = BackingDeviceRotation._sysfs_leaf_block_knames(walk_root)
+        if not leaves:
+            return True
+        return any(
+            BackingDeviceRotation._leaf_block_is_rotational(leaf) for leaf in leaves)
+
+
 def lsblk_all(device: str = '',
               columns: Optional[List[str]] = None,
               abspath: bool = False) -> List[Dict[str, str]]:
index 06a24b13c4c7b9a7a7cbf69bc7c3965b977573fd..367f01b613c25e138cf2b787dfc02859ac266c1f 100644 (file)
@@ -8,7 +8,13 @@ from ceph_volume import process, conf, terminal
 from ceph_volume.util import constants, system
 from ceph_volume.util.device import Device
 from .prepare import write_keyring
-from .disk import lsblk, device_family, get_part_entry_type, _dd_read
+from .disk import (
+    lsblk,
+    device_family,
+    get_part_entry_type,
+    _dd_read,
+    BackingDeviceRotation,
+)
 from packaging import version
 from typing import Any, Dict, List, Optional
 
@@ -65,7 +71,8 @@ def set_dmcrypt_no_workqueue(target_version: str = '2.3.4') -> None:
         raise RuntimeError("Couldn't check the cryptsetup version.")
 
 def bypass_workqueue(device: str) -> bool:
-    return not Device(device).rotational and conf.dmcrypt_no_workqueue
+    return (not BackingDeviceRotation.is_rotational(device)
+            and conf.dmcrypt_no_workqueue)
 
 def get_key_size_from_conf():
     """