From: Rongqi Sun Date: Mon, 27 Feb 2023 06:36:46 +0000 (+0800) Subject: ceph-volume: add seastore support. X-Git-Tag: v19.0.0~1274^2~13 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=05f1ce0e997bc98dcd0e3596f556fc98a8b872f3;p=ceph.git ceph-volume: add seastore support. Signed-off-by: Rongqi Sun --- diff --git a/src/ceph-volume/ceph_volume/devices/lvm/activate.py b/src/ceph-volume/ceph_volume/devices/lvm/activate.py index 53ed6aa4791..aec269c0369 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/activate.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/activate.py @@ -218,6 +218,51 @@ def activate_bluestore(osd_lvs, no_systemd=False, no_tmpfs=False): systemctl.start_osd(osd_id) terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id) +def activate_seastore(osd_lvs, no_systemd=False, no_tmpfs=False): + for lv in osd_lvs: + if lv.tags.get('ceph.type') == 'block': + osd_block_lv = lv + break + else: + raise RuntimeError('could not find a seastore OSD to activate') + + is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1' + dmcrypt_secret = None + osd_id = osd_block_lv.tags['ceph.osd_id'] + conf.cluster = osd_block_lv.tags['ceph.cluster_name'] + osd_fsid = osd_block_lv.tags['ceph.osd_fsid'] + configuration.load_ceph_conf_path(osd_block_lv.tags['ceph.cluster_name']) + configuration.load() + + # mount on tmpfs the osd directory + osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id) + if not system.path_is_mounted(osd_path): + # mkdir -p and mount as tmpfs + prepare_utils.create_osd_path(osd_id, tmpfs=not no_tmpfs) + # encryption is handled here, before priming the OSD dir + if is_encrypted: + osd_lv_path = '/dev/mapper/%s' % osd_block_lv.lv_uuid + lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret'] + encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret) + dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid) + encryption_utils.luks_open(dmcrypt_secret, osd_block_lv.lv_path, osd_block_lv.lv_uuid) + else: + osd_lv_path = osd_block_lv.lv_path + + # always re-do the symlink regardless if it exists, so that the block + # devices that may have changed can be mapped correctly every time + process.run(['ln', '-snf', osd_lv_path, os.path.join(osd_path, 'block')]) + system.chown(os.path.join(osd_path, 'block')) + if no_systemd is False: + # enable the ceph-volume unit for this OSD + systemctl.enable_volume(osd_id, osd_fsid, 'lvm') + + # enable the OSD + systemctl.enable_osd(osd_id) + + # start the OSD + systemctl.start_osd(osd_id) + terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id) class Activate(object): @@ -297,6 +342,8 @@ class Activate(object): # explicit filestore/bluestore flags take precedence if getattr(args, 'bluestore', False): activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False)) + elif getattr(args, 'seastore', False): + activate_seastore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False)) elif getattr(args, 'filestore', False): activate_filestore(lvs, args.no_systemd) elif any('ceph.block_device' in lv.tags for lv in lvs): @@ -348,6 +395,11 @@ class Activate(object): action='store_true', help='force bluestore objectstore activation', ) + parser.add_argument( + '--seastore', + action='store_true', + help='force seastore objectstore activation', + ) parser.add_argument( '--filestore', action='store_true', diff --git a/src/ceph-volume/ceph_volume/devices/lvm/batch.py b/src/ceph-volume/ceph_volume/devices/lvm/batch.py index 90c4c22c407..9a9e9f4d4f7 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/batch.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/batch.py @@ -237,6 +237,11 @@ class Batch(object): action='store_true', help='bluestore objectstore (default)', ) + parser.add_argument( + '--seastore', + action='store_true', + help='seastore objectstore (defualt)', + ) parser.add_argument( '--filestore', action='store_true', @@ -411,7 +416,7 @@ class Batch(object): # Default to bluestore here since defaulting it in add_argument may # cause both to be True - if not self.args.bluestore and not self.args.filestore: + if not self.args.bluestore and not self.args.filestore and not self.args.seastore: self.args.bluestore = True if (self.args.auto and not self.args.db_devices and not @@ -444,6 +449,7 @@ class Batch(object): defaults = common.get_default_args() global_args = [ 'bluestore', + 'seastore', 'filestore', 'dmcrypt', 'crush_device_class', @@ -464,6 +470,8 @@ class Batch(object): if args.bluestore: plan = self.get_deployment_layout(args, args.devices, args.db_devices, args.wal_devices) + elif args.seastore: + plan = self.get_deployment_layout(args, args.devices) elif args.filestore: plan = self.get_deployment_layout(args, args.devices, args.journal_devices) return plan diff --git a/src/ceph-volume/ceph_volume/devices/lvm/common.py b/src/ceph-volume/ceph_volume/devices/lvm/common.py index edc8e1cbce1..4b1762e2295 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/common.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/common.py @@ -126,6 +126,13 @@ bluestore_args = { }, } +seastore_args = { + '--seastore': { + 'action': 'store_true', + 'help': 'Use the seastore objectstore', + }, +} + filestore_args = { '--filestore': { 'action': 'store_true', @@ -152,7 +159,7 @@ def get_default_args(): defaults = {} def format_name(name): return name.strip('-').replace('-', '_').replace('.', '_') - for argset in (common_args, filestore_args, bluestore_args): + for argset in (common_args, filestore_args, bluestore_args, seastore_args): defaults.update({format_name(name): val.get('default', None) for name, val in argset.items()}) return defaults @@ -170,6 +177,7 @@ def common_parser(prog, description): filestore_group = parser.add_argument_group('filestore') bluestore_group = parser.add_argument_group('bluestore') + seastore_group = parser.add_argument_group('seastore') for name, kwargs in common_args.items(): parser.add_argument(name, **kwargs) @@ -180,6 +188,9 @@ def common_parser(prog, description): for name, kwargs in filestore_args.items(): filestore_group.add_argument(name, **kwargs) + for name, kwargs in seastore_args.items(): + seastore_group.add_argument(name, **kwargs) + # Do not parse args, so that consumers can do something before the args get # parsed triggering argparse behavior return parser diff --git a/src/ceph-volume/ceph_volume/devices/lvm/create.py b/src/ceph-volume/ceph_volume/devices/lvm/create.py index af2cd96c084..29175fbfc67 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/create.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/create.py @@ -68,7 +68,7 @@ class Create(object): if len(self.argv) == 0: print(sub_command_help) return - exclude_group_options(parser, groups=['filestore', 'bluestore'], argv=self.argv) + exclude_group_options(parser, groups=['filestore', 'bluestore', 'seastore'], argv=self.argv) args = parser.parse_args(self.argv) # Default to bluestore here since defaulting it in add_argument may # cause both to be True diff --git a/src/ceph-volume/ceph_volume/devices/lvm/prepare.py b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py index 2f715fdba12..3c53399cd68 100644 --- a/src/ceph-volume/ceph_volume/devices/lvm/prepare.py +++ b/src/ceph-volume/ceph_volume/devices/lvm/prepare.py @@ -119,6 +119,33 @@ def prepare_bluestore(block, wal, db, secrets, tags, osd_id, fsid): db=db ) +def prepare_seastore(block, secrets, tags, osd_id, fsid): + """ + :param block: The name of the logical volume for the seastore data + :param secrets: A dict with the secrets needed to create the osd (e.g. cephx) + :param id_: The OSD id + :param fsid: The OSD fsid, also known as the OSD UUID + """ + cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key()) + # encryption-only operations + if secrets.get('dmcrypt_key'): + key = secrets['dmcrypt_key'] + block = prepare_dmcrypt(key, block, 'block', tags) + + # create the directory + prepare_utils.create_osd_path(osd_id, tmpfs=True) + # symlink the block + prepare_utils.link_block(block, osd_id) + # get the latest monmap + prepare_utils.get_monmap(osd_id) + # write the OSD keyring if it doesn't exist already + prepare_utils.write_keyring(osd_id, cephx_secret) + # prepare the osd filesystem + prepare_utils.osd_mkfs_seastore( + osd_id, fsid, + keyring=cephx_secret, + ) + class Prepare(object): @@ -393,6 +420,32 @@ class Prepare(object): self.osd_id, osd_fsid, ) + elif self.args.seastore: + try: + vg_name, lv_name = self.args.data.split('/') + block_lv = api.get_single_lv(filters={'lv_name': lv_name, + 'vg_name': vg_name}) + except ValueError: + block_lv = None + + if not block_lv: + block_lv = self.prepare_data_device('block', osd_fsid) + + tags['ceph.block_device'] = block_lv.lv_path + tags['ceph.block_uuid'] = block_lv.lv_uuid + tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret + tags['ceph.encrypted'] = encrypted + tags['ceph.vdo'] = api.is_vdo(block_lv.lv_path) + tags['ceph.type'] = 'block' + block_lv.set_tags(tags) + + prepare_seastore( + block_lv.lv_path, + secrets, + tags, + self.osd_id, + osd_fsid, + ) def main(self): sub_command_help = dedent(""" @@ -427,7 +480,7 @@ class Prepare(object): if len(self.argv) == 0: print(sub_command_help) return - exclude_group_options(parser, argv=self.argv, groups=['filestore', 'bluestore']) + exclude_group_options(parser, argv=self.argv, groups=['filestore', 'bluestore', 'seastore']) self.args = parser.parse_args(self.argv) # the unfortunate mix of one superset for both filestore and bluestore # makes this validation cumbersome @@ -436,6 +489,6 @@ class Prepare(object): raise SystemExit('--journal is required when using --filestore') # Default to bluestore here since defaulting it in add_argument may # cause both to be True - if not self.args.bluestore and not self.args.filestore: + if not self.args.bluestore and not self.args.filestore and not self.args.seastore: self.args.bluestore = True self.safe_prepare() diff --git a/src/ceph-volume/ceph_volume/util/prepare.py b/src/ceph-volume/ceph_volume/util/prepare.py index ff7427eedd2..6d4fc883736 100644 --- a/src/ceph-volume/ceph_volume/util/prepare.py +++ b/src/ceph-volume/ceph_volume/util/prepare.py @@ -484,6 +484,69 @@ def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False): raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) +def osd_mkfs_seastore(osd_id, fsid, keyring=None): + """ + Create the files for the OSD to function. A normal call will look like: + + ceph-osd --cluster ceph --mkfs --mkkey -i 0 \ + --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \ + --osd-data /var/lib/ceph/osd/ceph-0 \ + --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \ + --keyring /var/lib/ceph/osd/ceph-0/keyring \ + --setuser ceph --setgroup ceph + + In some cases it is required to use the keyring, when it is passed in as + a keyword argument it is used as part of the ceph-osd command + """ + path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id) + monmap = os.path.join(path, 'activate.monmap') + + system.chown(path) + + base_command = [ + 'ceph-osd', + '--cluster', conf.cluster, + '--osd-objectstore', 'seastore', + '--mkfs', + '--key', "/var/lib/ceph/osd/ceph-%s/keyring" % osd_id, + '-i', osd_id, + '--monmap', monmap, + ] + + supplementary_command = [ + '--osd-data', path, + '--osd-uuid', fsid, + '--setuser', 'ceph', + '--setgroup', 'ceph' + ] + + if keyring is not None: + base_command.extend(['--keyfile', '-']) + + if get_osdspec_affinity(): + base_command.extend(['--osdspec-affinity', get_osdspec_affinity()]) + + command = base_command + supplementary_command + + """ + When running in containers the --mkfs on raw device sometimes fails + to acquire a lock through flock() on the device because systemd-udevd holds one temporarily. + See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock. + Because this is really transient, we retry up to 5 times and wait for 1 sec in-between + """ + for retry in range(5): + _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True) + if returncode == 0: + break + else: + if returncode == errno.EWOULDBLOCK: + time.sleep(1) + logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry) + continue + else: + raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command))) + + def osd_mkfs_filestore(osd_id, fsid, keyring): """ Create the files for the OSD to function. A normal call will look like: