import logging
from textwrap import dedent
from ceph_volume import decorators, process
+from ceph_volume.util import disk
logger = logging.getLogger(__name__)
+
def direct_report(devices):
"""
Other non-cli consumers of listing information will want to consume the
_list = List([])
return _list.generate(devices)
+def _get_bluestore_info(dev):
+ out, err, rc = process.call([
+ 'ceph-bluestore-tool', 'show-label',
+ '--dev', dev], verbose_on_failure=False)
+ if rc:
+ # ceph-bluestore-tool returns an error (below) if device is not bluestore OSD
+ # > unable to read label for <device>: (2) No such file or directory
+ # but it's possible the error could be for a different reason (like if the disk fails)
+ logger.debug('assuming device {} is not BlueStore; ceph-bluestore-tool failed to get info from device: {}\n{}'.format(dev, out, err))
+ return None
+ oj = json.loads(''.join(out))
+ if dev not in oj:
+ # should be impossible, so warn
+ logger.warning('skipping device {} because it is not reported in ceph-bluestore-tool output: {}'.format(dev, out))
+ return None
+ try:
+ if oj[dev]['description'] != 'main':
+ # ignore non-main devices, for now
+ logger.info('ignoring non-main device {}'.format(dev))
+ return None
+ whoami = oj[dev]['whoami']
+ return {
+ 'type': 'bluestore',
+ 'osd_id': int(whoami),
+ 'osd_uuid': oj[dev]['osd_uuid'],
+ 'ceph_fsid': oj[dev]['ceph_fsid'],
+ 'device': dev
+ }
+ except KeyError as e:
+ # this will appear for devices that have a bluestore header but aren't valid OSDs
+ # for example, due to incomplete rollback of OSDs: https://tracker.ceph.com/issues/51869
+ logger.error('device {} does not have all BlueStore data needed to be a valid OSD: {}\n{}'.format(dev, out, e))
+ return None
+
class List(object):
self.argv = argv
def generate(self, devs=None):
- if not devs:
- logger.debug('Listing block devices via lsblk...')
+ logger.debug('Listing block devices via lsblk...')
+
+ if devs is None or devs == []:
devs = []
- # adding '--inverse' allows us to get the mapper devices list in that command output.
- # not listing root devices containing partitions shouldn't have side effect since we are
- # in `ceph-volume raw` context.
- #
- # example:
- # running `lsblk --paths --nodeps --output=NAME --noheadings` doesn't allow to get the mapper list
- # because the output is like following :
- #
- # $ lsblk --paths --nodeps --output=NAME --noheadings
- # /dev/sda
- # /dev/sdb
- # /dev/sdc
- # /dev/sdd
- #
- # the dmcrypt mappers are hidden because of the `--nodeps` given they are displayed as a dependency.
- #
- # $ lsblk --paths --output=NAME --noheadings
- # /dev/sda
- # |-/dev/mapper/ceph-3b52c90d-6548-407d-bde1-efd31809702f-sda-block-dmcrypt
- # `-/dev/mapper/ceph-3b52c90d-6548-407d-bde1-efd31809702f-sda-db-dmcrypt
- # /dev/sdb
- # /dev/sdc
- # /dev/sdd
- #
- # adding `--inverse` is a trick to get around this issue, the counterpart is that we can't list root devices if they contain
- # at least one partition but this shouldn't be an issue in `ceph-volume raw` context given we only deal with raw devices.
+ # If no devs are given initially, we want to list ALL devices including children and
+ # parents. Parent disks with child partitions may be the appropriate device to return if
+ # the parent disk has a bluestore header, but children may be the most appropriate
+ # devices to return if the parent disk does not have a bluestore header.
out, err, ret = process.call([
- 'lsblk', '--paths', '--nodeps', '--output=NAME', '--noheadings', '--inverse'
+ 'lsblk', '--paths', '--output=NAME', '--noheadings',
])
assert not ret
devs = out
+
result = {}
+ logger.debug('inspecting devices: {}'.format(devs))
for dev in devs:
- logger.debug('Examining %s' % dev)
- # bluestore?
- out, err, ret = process.call([
- 'ceph-bluestore-tool', 'show-label',
- '--dev', dev], verbose_on_failure=False)
- if ret:
- logger.debug('No label on %s' % dev)
- continue
- oj = json.loads(''.join(out))
- if dev not in oj:
+ info = disk.lsblk(dev, abspath=True)
+ # Linux kernels built with CONFIG_ATARI_PARTITION enabled can falsely interpret
+ # bluestore's on-disk format as an Atari partition table. These false Atari partitions
+ # can be interpreted as real OSDs if a bluestore OSD was previously created on the false
+ # partition. See https://tracker.ceph.com/issues/52060 for more info. If a device has a
+ # parent, it is a child. If the parent is a valid bluestore OSD, the child will only
+ # exist if it is a phantom Atari partition, and the child should be ignored. If the
+ # parent isn't bluestore, then the child could be a valid bluestore OSD. If we fail to
+ # determine whether a parent is bluestore, we should err on the side of not reporting
+ # the child so as not to give a false negative.
+ if 'PKNAME' in info and info['PKNAME'] != "":
+ parent = info['PKNAME']
+ try:
+ if disk.has_bluestore_label(parent):
+ logger.warning(('ignoring child device {} whose parent {} is a BlueStore OSD.'.format(dev, parent),
+ 'device is likely a phantom Atari partition. device info: {}'.format(info)))
+ continue
+ except OSError as e:
+ logger.error(('ignoring child device {} to avoid reporting invalid BlueStore data from phantom Atari partitions.'.format(dev),
+ 'failed to determine if parent device {} is BlueStore. err: {}'.format(parent, e)))
+ continue
+
+ bs_info = _get_bluestore_info(dev)
+ if bs_info is None:
+ # None is also returned in the rare event that there is an issue reading info from
+ # a BlueStore disk, so be sure to log our assumption that it isn't bluestore
+ logger.info('device {} does not have BlueStore information'.format(dev))
continue
- if oj[dev]['description'] != 'main':
- # ignore non-main devices, for now
- continue
- whoami = oj[dev]['whoami']
- result[oj[dev]['osd_uuid']] = {
- 'type': 'bluestore',
- 'osd_id': int(whoami),
- 'osd_uuid': oj[dev]['osd_uuid'],
- 'ceph_fsid': oj[dev]['ceph_fsid'],
- 'device': dev
- }
+ result[bs_info['osd_uuid']] = bs_info
+
return result
@decorators.needs_root
--- /dev/null
+import pytest
+from mock.mock import patch
+from ceph_volume.devices import raw
+
+# Sample lsblk output is below that overviews the test scenario. (--json output for reader clarity)
+# - sda and all its children are used for the OS
+# - sdb is a bluestore OSD with phantom Atari partitions
+# - sdc is an empty disk
+# - sdd has 2 LVM device children
+# > lsblk --paths --json
+# {
+# "blockdevices": [
+# {"name": "/dev/sda", "maj:min": "8:0", "rm": "0", "size": "128G", "ro": "0", "type": "disk", "mountpoint": null,
+# "children": [
+# {"name": "/dev/sda1", "maj:min": "8:1", "rm": "0", "size": "487M", "ro": "0", "type": "part", "mountpoint": null},
+# {"name": "/dev/sda2", "maj:min": "8:2", "rm": "0", "size": "1.9G", "ro": "0", "type": "part", "mountpoint": null},
+# {"name": "/dev/sda3", "maj:min": "8:3", "rm": "0", "size": "125.6G", "ro": "0", "type": "part", "mountpoint": "/etc/hosts"}
+# ]
+# },
+# {"name": "/dev/sdb", "maj:min": "8:16", "rm": "0", "size": "1T", "ro": "0", "type": "disk", "mountpoint": null,
+# "children": [
+# {"name": "/dev/sdb2", "maj:min": "8:18", "rm": "0", "size": "48G", "ro": "0", "type": "part", "mountpoint": null},
+# {"name": "/dev/sdb3", "maj:min": "8:19", "rm": "0", "size": "6M", "ro": "0", "type": "part", "mountpoint": null}
+# ]
+# },
+# {"name": "/dev/sdc", "maj:min": "8:32", "rm": "0", "size": "1T", "ro": "0", "type": "disk", "mountpoint": null},
+# {"name": "/dev/sdd", "maj:min": "8:48", "rm": "0", "size": "1T", "ro": "0", "type": "disk", "mountpoint": null,
+# "children": [
+# {"name": "/dev/mapper/ceph--osd--block--1", "maj:min": "253:0", "rm": "0", "size": "512G", "ro": "0", "type": "lvm", "mountpoint": null},
+# {"name": "/dev/mapper/ceph--osd--block--2", "maj:min": "253:1", "rm": "0", "size": "512G", "ro": "0", "type": "lvm", "mountpoint": null}
+# ]
+# }
+# ]
+# }
+
+def _devices_side_effect():
+ return {
+ "/dev/sda": {},
+ "/dev/sda1": {},
+ "/dev/sda2": {},
+ "/dev/sda3": {},
+ "/dev/sdb": {},
+ "/dev/sdb2": {},
+ "/dev/sdb3": {},
+ "/dev/sdc": {},
+ "/dev/sdd": {},
+ "/dev/mapper/ceph--osd--block--1": {},
+ "/dev/mapper/ceph--osd--block--2": {},
+ }
+
+def _lsblk_list_output():
+ return [
+ '/dev/sda',
+ '/dev/sda1',
+ '/dev/sda2',
+ '/dev/sda3',
+ '/dev/sdb',
+ '/dev/sdb2',
+ '/dev/sdb3',
+ '/dev/sdc',
+ '/dev/sdd',
+ '/dev/mapper/ceph--osd--block--1',
+ '/dev/mapper/ceph--osd--block--2',
+ ]
+
+# dummy lsblk output for device with optional parent output
+def _lsblk_output(dev, parent=None):
+ if parent is None:
+ parent = ""
+ ret = 'NAME="{}" KNAME="{}" PKNAME="{}"'.format(dev, dev, parent)
+ return [ret] # needs to be in a list form
+
+def _bluestore_tool_label_output_sdb():
+ return '''{
+ "/dev/sdb": {
+ "osd_uuid": "sdb-uuid",
+ "size": 1099511627776,
+ "btime": "2021-07-23T16:02:22.809186+0000",
+ "description": "main",
+ "bfm_blocks": "268435456",
+ "bfm_blocks_per_key": "128",
+ "bfm_bytes_per_block": "4096",
+ "bfm_size": "1099511627776",
+ "bluefs": "1",
+ "ceph_fsid": "sdb-fsid",
+ "kv_backend": "rocksdb",
+ "magic": "ceph osd volume v026",
+ "mkfs_done": "yes",
+ "osd_key": "AQAO6PpgK+y4CBAAixq/X7OVimbaezvwD/cDmg==",
+ "ready": "ready",
+ "require_osd_release": "16",
+ "whoami": "0"
+ }
+}'''
+
+def _bluestore_tool_label_output_sdb2():
+ return '''{
+ "/dev/sdb2": {
+ "osd_uuid": "sdb2-uuid",
+ "size": 1099511627776,
+ "btime": "2021-07-23T16:02:22.809186+0000",
+ "description": "main",
+ "bfm_blocks": "268435456",
+ "bfm_blocks_per_key": "128",
+ "bfm_bytes_per_block": "4096",
+ "bfm_size": "1099511627776",
+ "bluefs": "1",
+ "ceph_fsid": "sdb2-fsid",
+ "kv_backend": "rocksdb",
+ "magic": "ceph osd volume v026",
+ "mkfs_done": "yes",
+ "osd_key": "AQAO6PpgK+y4CBAAixq/X7OVimbaezvwD/cDmg==",
+ "ready": "ready",
+ "require_osd_release": "16",
+ "whoami": "2"
+ }
+}'''
+
+def _bluestore_tool_label_output_dm_okay():
+ return '''{
+ "/dev/mapper/ceph--osd--block--1": {
+ "osd_uuid": "lvm-1-uuid",
+ "size": 549751619584,
+ "btime": "2021-07-23T16:04:37.881060+0000",
+ "description": "main",
+ "bfm_blocks": "134216704",
+ "bfm_blocks_per_key": "128",
+ "bfm_bytes_per_block": "4096",
+ "bfm_size": "549751619584",
+ "bluefs": "1",
+ "ceph_fsid": "lvm-1-fsid",
+ "kv_backend": "rocksdb",
+ "magic": "ceph osd volume v026",
+ "mkfs_done": "yes",
+ "osd_key": "AQCU6Ppgz+UcIRAAh6IUjtPjiXBlEXfwO8ixzw==",
+ "ready": "ready",
+ "require_osd_release": "16",
+ "whoami": "2"
+ }
+}'''
+
+def _process_call_side_effect(command, **kw):
+ if "lsblk" in command:
+ if "/dev/" in command[-1]:
+ dev = command[-1]
+ if dev == "/dev/sda1" or dev == "/dev/sda2" or dev == "/dev/sda3":
+ return _lsblk_output(dev, parent="/dev/sda"), '', 0
+ if dev == "/dev/sdb2" or dev == "/dev/sdb3":
+ return _lsblk_output(dev, parent="/dev/sdb"), '', 0
+ if dev == "/dev/sda" or dev == "/dev/sdb" or dev == "/dev/sdc" or dev == "/dev/sdd":
+ return _lsblk_output(dev), '', 0
+ if "mapper" in dev:
+ return _lsblk_output(dev, parent="/dev/sdd"), '', 0
+ pytest.fail('dev {} needs behavior specified for it'.format(dev))
+ if "/dev/" not in command:
+ return _lsblk_list_output(), '', 0
+ pytest.fail('command {} needs behavior specified for it'.format(command))
+
+ if "ceph-bluestore-tool" in command:
+ if "/dev/sdb" in command:
+ # sdb is a bluestore OSD
+ return _bluestore_tool_label_output_sdb(), '', 0
+ if "/dev/sdb2" in command:
+ # sdb2 is a phantom atari partition that appears to have some valid bluestore info
+ return _bluestore_tool_label_output_sdb2(), '', 0
+ if "/dev/mapper/ceph--osd--block--1" in command:
+ # dm device 1 is a valid bluestore OSD (the other is corrupted/invalid)
+ return _bluestore_tool_label_output_dm_okay(), '', 0
+ # sda and children, sdb's children, sdc, sdd, dm device 2 all do NOT have bluestore OSD data
+ return [], 'fake No such file or directory error', 1
+ pytest.fail('command {} needs behavior specified for it'.format(command))
+
+def _has_bluestore_label_side_effect(disk_path):
+ if "/dev/sda" in disk_path:
+ return False # disk and all children are for the OS
+ if disk_path == "/dev/sdb":
+ return True # sdb is a valid bluestore OSD
+ if disk_path == "/dev/sdb2":
+ return True # sdb2 appears to be a valid bluestore OSD even though it should not be
+ if disk_path == "/dev/sdc":
+ return False # empty disk
+ if disk_path == "/dev/sdd":
+ return False # has LVM subdevices
+ if disk_path == "/dev/mapper/ceph--osd--block--1":
+ return True # good OSD
+ if disk_path == "/dev/mapper/ceph--osd--block--2":
+ return False # corrupted
+ pytest.fail('device {} needs behavior specified for it'.format(disk_path))
+
+class TestList(object):
+
+ @patch('ceph_volume.util.device.disk.get_devices')
+ @patch('ceph_volume.util.disk.has_bluestore_label')
+ @patch('ceph_volume.process.call')
+ def test_raw_list(self, patched_call, patched_bluestore_label, patched_get_devices):
+ raw.list.logger.setLevel("DEBUG")
+ patched_call.side_effect = _process_call_side_effect
+ patched_bluestore_label.side_effect = _has_bluestore_label_side_effect
+ patched_get_devices.side_effect = _devices_side_effect
+
+ result = raw.list.List([]).generate()
+ assert len(result) == 2
+
+ sdb = result['sdb-uuid']
+ assert sdb['osd_uuid'] == 'sdb-uuid'
+ assert sdb['osd_id'] == 0
+ assert sdb['device'] == '/dev/sdb'
+ assert sdb['ceph_fsid'] == 'sdb-fsid'
+ assert sdb['type'] == 'bluestore'
+
+ lvm1 = result['lvm-1-uuid']
+ assert lvm1['osd_uuid'] == 'lvm-1-uuid'
+ assert lvm1['osd_id'] == 2
+ assert lvm1['device'] == '/dev/mapper/ceph--osd--block--1'
+ assert lvm1['ceph_fsid'] == 'lvm-1-fsid'
+ assert lvm1['type'] == 'bluestore'
+
+ @patch('ceph_volume.util.device.disk.get_devices')
+ @patch('ceph_volume.util.disk.has_bluestore_label')
+ @patch('ceph_volume.process.call')
+ def test_raw_list_with_OSError(self, patched_call, patched_bluestore_label, patched_get_devices):
+ def _has_bluestore_label_side_effect_with_OSError(device_path):
+ if device_path == "/dev/sdd":
+ raise OSError('fake OSError')
+ return _has_bluestore_label_side_effect(device_path)
+
+ raw.list.logger.setLevel("DEBUG")
+ patched_call.side_effect = _process_call_side_effect
+ patched_bluestore_label.side_effect = _has_bluestore_label_side_effect_with_OSError
+ patched_get_devices.side_effect = _devices_side_effect
+
+ result = raw.list.List([]).generate()
+ assert len(result) == 1
+ assert 'sdb-uuid' in result
def test_is_partition(self, device_info):
data = {"/dev/sda1": {"foo": "bar"}}
- lsblk = {"TYPE": "part"}
+ lsblk = {"TYPE": "part", "PKNAME": "sda"}
device_info(devices=data, lsblk=lsblk)
disk = device.Device("/dev/sda1")
assert disk.is_partition
def test_is_not_lvm_memeber(self, device_info):
data = {"/dev/sda1": {"foo": "bar"}}
- lsblk = {"TYPE": "part"}
+ lsblk = {"TYPE": "part", "PKNAME": "sda"}
device_info(devices=data, lsblk=lsblk)
disk = device.Device("/dev/sda1")
assert not disk.is_lvm_member
def test_is_lvm_memeber(self, device_info):
data = {"/dev/sda1": {"foo": "bar"}}
- lsblk = {"TYPE": "part"}
+ lsblk = {"TYPE": "part", "PKNAME": "sda"}
device_info(devices=data, lsblk=lsblk)
disk = device.Device("/dev/sda1")
assert not disk.is_lvm_member
def test_used_by_ceph(self, device_info,
monkeypatch, ceph_type):
data = {"/dev/sda": {"foo": "bar"}}
- lsblk = {"TYPE": "part"}
+ lsblk = {"TYPE": "part", "PKNAME": "sda"}
FooPVolume = api.PVolume(pv_name='/dev/sda', pv_uuid="0000",
lv_uuid="0000", pv_tags={}, vg_name="vg")
pvolumes = []
pvolumes = []
pvolumes.append(FooPVolume)
data = {"/dev/sda": {"foo": "bar"}}
- lsblk = {"TYPE": "part"}
+ lsblk = {"TYPE": "part", "PKNAME": "sda"}
lv_data = {"lv_path": "vg/lv", "vg_name": "vg", "lv_uuid": "0000", "tags": {"ceph.osd_id": 0, "ceph.type": "journal"}}
monkeypatch.setattr(api, 'get_pvs', lambda **kwargs: pvolumes)
class TestDeviceEncryption(object):
def test_partition_is_not_encrypted_lsblk(self, device_info):
- lsblk = {'TYPE': 'part', 'FSTYPE': 'xfs'}
+ lsblk = {'TYPE': 'part', 'FSTYPE': 'xfs', 'PKNAME': 'sda'}
device_info(lsblk=lsblk)
disk = device.Device("/dev/sda")
assert disk.is_encrypted is False
def test_partition_is_encrypted_lsblk(self, device_info):
- lsblk = {'TYPE': 'part', 'FSTYPE': 'crypto_LUKS'}
+ lsblk = {'TYPE': 'part', 'FSTYPE': 'crypto_LUKS', 'PKNAME': 'sda'}
device_info(lsblk=lsblk)
disk = device.Device("/dev/sda")
assert disk.is_encrypted is True
def test_partition_is_not_encrypted_blkid(self, device_info):
- lsblk = {'TYPE': 'part'}
+ lsblk = {'TYPE': 'part', 'PKNAME': 'sda'}
blkid = {'TYPE': 'ceph data'}
device_info(lsblk=lsblk, blkid=blkid)
disk = device.Device("/dev/sda")
assert disk.is_encrypted is False
def test_partition_is_encrypted_blkid(self, device_info):
- lsblk = {'TYPE': 'part'}
+ lsblk = {'TYPE': 'part', 'PKNAME': 'sda'}
blkid = {'TYPE': 'crypto_LUKS'}
device_info(lsblk=lsblk, blkid=blkid)
disk = device.Device("/dev/sda")
def size(self):
return self.sys_api['size']
+ @property
+ def parent_device(self):
+ if 'PKNAME' in self.disk_api:
+ return '/dev/%s' % self.disk_api['PKNAME']
+ return None
+
@property
def lvm_size(self):
"""
@property
def has_bluestore_label(self):
- isBluestore = False
- bluestoreDiskSignature = 'bluestore block device' # 22 bytes long
-
- # throws OSError on failure
- with open(self.abspath, "rb") as fd:
- # read first 22 bytes looking for bluestore disk signature
- signature = fd.read(22)
- if signature.decode('ascii', 'replace') == bluestoreDiskSignature:
- isBluestore = True
-
- return isBluestore
+ return disk.has_bluestore_label(self.abspath)
@property
def is_mapper(self):
except OSError as e:
# likely failed to open the device. assuming it is BlueStore is the safest option
# so that a possibly-already-existing OSD doesn't get overwritten
- logger.error('failed to determine if device {} is Bluestore. device should not be used to avoid false negatives. err: {}'.format(self.abspath, e))
+ logger.error('failed to determine if device {} is BlueStore. device should not be used to avoid false negatives. err: {}'.format(self.abspath, e))
rejected.append('Failed to determine if device is BlueStore')
+ if self.is_partition:
+ try:
+ if disk.has_bluestore_label(self.parent_device):
+ rejected.append('Parent has BlueStore device label')
+ except OSError as e:
+ # likely failed to open the device. assuming the parent is BlueStore is the safest
+ # option so that a possibly-already-existing OSD doesn't get overwritten
+ logger.error('failed to determine if partition {} (parent: {}) has a BlueStore parent. partition should not be used to avoid false negatives. err: {}'.format(self.abspath, self.parent_device, e))
+ rejected.append('Failed to determine if parent device is BlueStore')
+
if self.has_gpt_headers:
rejected.append('Has GPT headers')
return rejected
:param device: A ``Device()`` object
"""
- parent_device = '/dev/%s' % device.disk_api['PKNAME']
udev_info = udevadm_property(device.abspath)
partition_number = udev_info.get('ID_PART_ENTRY_NUMBER')
if not partition_number:
raise RuntimeError('Unable to detect the partition number for device: %s' % device.abspath)
process.run(
- ['parted', parent_device, '--script', '--', 'rm', partition_number]
+ ['parted', device.parent_device, '--script', '--', 'rm', partition_number]
)
device_facts[diskname] = metadata
return device_facts
+
+def has_bluestore_label(device_path):
+ isBluestore = False
+ bluestoreDiskSignature = 'bluestore block device' # 22 bytes long
+
+ # throws OSError on failure
+ logger.info("opening device {} to check for BlueStore label".format(device_path))
+ with open(device_path, "rb") as fd:
+ # read first 22 bytes looking for bluestore disk signature
+ signature = fd.read(22)
+ if signature.decode('ascii', 'replace') == bluestoreDiskSignature:
+ isBluestore = True
+
+ return isBluestore