]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-volume: add seastore support.
authorRongqi Sun <sunrongqi@huawei.com>
Mon, 27 Feb 2023 06:36:46 +0000 (14:36 +0800)
committerRongqi Sun <sunrongqi@huawei.com>
Mon, 27 Feb 2023 06:36:46 +0000 (14:36 +0800)
Signed-off-by: Rongqi Sun <sunrongqi@huawei.com>
src/ceph-volume/ceph_volume/devices/lvm/activate.py
src/ceph-volume/ceph_volume/devices/lvm/batch.py
src/ceph-volume/ceph_volume/devices/lvm/common.py
src/ceph-volume/ceph_volume/devices/lvm/create.py
src/ceph-volume/ceph_volume/devices/lvm/prepare.py
src/ceph-volume/ceph_volume/util/prepare.py

index 53ed6aa47918e786ae6109b3299cf26f60cec444..aec269c036913cf432790c5a030f6944d73e9221 100644 (file)
@@ -218,6 +218,51 @@ def activate_bluestore(osd_lvs, no_systemd=False, no_tmpfs=False):
         systemctl.start_osd(osd_id)
     terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
 
+def activate_seastore(osd_lvs, no_systemd=False, no_tmpfs=False):
+    for lv in osd_lvs:
+        if lv.tags.get('ceph.type') == 'block':
+            osd_block_lv = lv
+            break
+    else:
+        raise RuntimeError('could not find a seastore OSD to activate')
+
+    is_encrypted = osd_block_lv.tags.get('ceph.encrypted', '0') == '1'
+    dmcrypt_secret = None
+    osd_id = osd_block_lv.tags['ceph.osd_id']
+    conf.cluster = osd_block_lv.tags['ceph.cluster_name']
+    osd_fsid = osd_block_lv.tags['ceph.osd_fsid']
+    configuration.load_ceph_conf_path(osd_block_lv.tags['ceph.cluster_name'])
+    configuration.load()
+
+    # mount on tmpfs the osd directory
+    osd_path = '/var/lib/ceph/osd/%s-%s' % (conf.cluster, osd_id)
+    if not system.path_is_mounted(osd_path):
+        # mkdir -p and mount as tmpfs
+        prepare_utils.create_osd_path(osd_id, tmpfs=not no_tmpfs)
+    # encryption is handled here, before priming the OSD dir
+    if is_encrypted:
+        osd_lv_path = '/dev/mapper/%s' % osd_block_lv.lv_uuid
+        lockbox_secret = osd_block_lv.tags['ceph.cephx_lockbox_secret']
+        encryption_utils.write_lockbox_keyring(osd_id, osd_fsid, lockbox_secret)
+        dmcrypt_secret = encryption_utils.get_dmcrypt_key(osd_id, osd_fsid)
+        encryption_utils.luks_open(dmcrypt_secret, osd_block_lv.lv_path, osd_block_lv.lv_uuid)
+    else:
+        osd_lv_path = osd_block_lv.lv_path
+    
+    # always re-do the symlink regardless if it exists, so that the block
+    # devices that may have changed can be mapped correctly every time
+    process.run(['ln', '-snf', osd_lv_path, os.path.join(osd_path, 'block')])
+    system.chown(os.path.join(osd_path, 'block'))
+    if no_systemd is False:        
+        # enable the ceph-volume unit for this OSD
+        systemctl.enable_volume(osd_id, osd_fsid, 'lvm')
+
+        # enable the OSD
+        systemctl.enable_osd(osd_id)
+
+        # start the OSD
+        systemctl.start_osd(osd_id)
+    terminal.success("ceph-volume lvm activate successful for osd ID: %s" % osd_id)
 
 class Activate(object):
 
@@ -297,6 +342,8 @@ class Activate(object):
         # explicit filestore/bluestore flags take precedence
         if getattr(args, 'bluestore', False):
             activate_bluestore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False))
+        elif getattr(args, 'seastore', False):
+            activate_seastore(lvs, args.no_systemd, getattr(args, 'no_tmpfs', False))
         elif getattr(args, 'filestore', False):
             activate_filestore(lvs, args.no_systemd)
         elif any('ceph.block_device' in lv.tags for lv in lvs):
@@ -348,6 +395,11 @@ class Activate(object):
             action='store_true',
             help='force bluestore objectstore activation',
         )
+        parser.add_argument(
+            '--seastore',
+            action='store_true',
+            help='force seastore objectstore activation',
+        )
         parser.add_argument(
             '--filestore',
             action='store_true',
index 90c4c22c407aadf5289d7172ed5b423cdb9938e1..9a9e9f4d4f741e038cedb23ee257ec724f4d4e08 100644 (file)
@@ -237,6 +237,11 @@ class Batch(object):
             action='store_true',
             help='bluestore objectstore (default)',
         )
+        parser.add_argument(
+            '--seastore',
+            action='store_true',
+            help='seastore objectstore (defualt)',
+        )
         parser.add_argument(
             '--filestore',
             action='store_true',
@@ -411,7 +416,7 @@ class Batch(object):
 
         # Default to bluestore here since defaulting it in add_argument may
         # cause both to be True
-        if not self.args.bluestore and not self.args.filestore:
+        if not self.args.bluestore and not self.args.filestore and not self.args.seastore:
             self.args.bluestore = True
 
         if (self.args.auto and not self.args.db_devices and not
@@ -444,6 +449,7 @@ class Batch(object):
         defaults = common.get_default_args()
         global_args = [
             'bluestore',
+            'seastore',
             'filestore',
             'dmcrypt',
             'crush_device_class',
@@ -464,6 +470,8 @@ class Batch(object):
         if args.bluestore:
             plan = self.get_deployment_layout(args, args.devices, args.db_devices,
                                               args.wal_devices)
+        elif args.seastore:
+            plan = self.get_deployment_layout(args, args.devices)
         elif args.filestore:
             plan = self.get_deployment_layout(args, args.devices, args.journal_devices)
         return plan
index edc8e1cbce117c377b60bf36d97b04090b9072e5..4b1762e229586c1657c967d4557df4d0bca47e75 100644 (file)
@@ -126,6 +126,13 @@ bluestore_args = {
     },
 }
 
+seastore_args = {
+    '--seastore': {
+        'action': 'store_true',
+        'help': 'Use the seastore objectstore',
+    },
+}
+
 filestore_args = {
     '--filestore': {
         'action': 'store_true',
@@ -152,7 +159,7 @@ def get_default_args():
     defaults = {}
     def format_name(name):
         return name.strip('-').replace('-', '_').replace('.', '_')
-    for argset in (common_args, filestore_args, bluestore_args):
+    for argset in (common_args, filestore_args, bluestore_args, seastore_args):
         defaults.update({format_name(name): val.get('default', None) for name, val in argset.items()})
     return defaults
 
@@ -170,6 +177,7 @@ def common_parser(prog, description):
 
     filestore_group = parser.add_argument_group('filestore')
     bluestore_group = parser.add_argument_group('bluestore')
+    seastore_group = parser.add_argument_group('seastore')
 
     for name, kwargs in common_args.items():
         parser.add_argument(name, **kwargs)
@@ -180,6 +188,9 @@ def common_parser(prog, description):
     for name, kwargs in filestore_args.items():
         filestore_group.add_argument(name, **kwargs)
 
+    for name, kwargs in seastore_args.items():
+        seastore_group.add_argument(name, **kwargs)
+
     # Do not parse args, so that consumers can do something before the args get
     # parsed triggering argparse behavior
     return parser
index af2cd96c0845ea93a0a5e141ce353bb5d00396a1..29175fbfc673588e1f83d0e3e1fa74cff4dcaaf5 100644 (file)
@@ -68,7 +68,7 @@ class Create(object):
         if len(self.argv) == 0:
             print(sub_command_help)
             return
-        exclude_group_options(parser, groups=['filestore', 'bluestore'], argv=self.argv)
+        exclude_group_options(parser, groups=['filestore', 'bluestore', 'seastore'], argv=self.argv)
         args = parser.parse_args(self.argv)
         # Default to bluestore here since defaulting it in add_argument may
         # cause both to be True
index 2f715fdba122c8a87fc2a097a34df4c66c0766a1..3c53399cd68ac5417861455e78e773437644ea37 100644 (file)
@@ -119,6 +119,33 @@ def prepare_bluestore(block, wal, db, secrets, tags, osd_id, fsid):
         db=db
     )
 
+def prepare_seastore(block, secrets, tags, osd_id, fsid):
+    """
+    :param block: The name of the logical volume for the seastore data
+    :param secrets: A dict with the secrets needed to create the osd (e.g. cephx)
+    :param id_: The OSD id
+    :param fsid: The OSD fsid, also known as the OSD UUID
+    """
+    cephx_secret = secrets.get('cephx_secret', prepare_utils.create_key())
+    # encryption-only operations
+    if secrets.get('dmcrypt_key'):
+        key = secrets['dmcrypt_key']
+        block = prepare_dmcrypt(key, block, 'block', tags)
+
+    # create the directory
+    prepare_utils.create_osd_path(osd_id, tmpfs=True)
+    # symlink the block
+    prepare_utils.link_block(block, osd_id)
+    # get the latest monmap
+    prepare_utils.get_monmap(osd_id)
+    # write the OSD keyring if it doesn't exist already
+    prepare_utils.write_keyring(osd_id, cephx_secret)
+    # prepare the osd filesystem
+    prepare_utils.osd_mkfs_seastore(
+        osd_id, fsid,
+        keyring=cephx_secret,
+    )
+
 
 class Prepare(object):
 
@@ -393,6 +420,32 @@ class Prepare(object):
                 self.osd_id,
                 osd_fsid,
             )
+        elif self.args.seastore:
+            try:
+                vg_name, lv_name = self.args.data.split('/')
+                block_lv = api.get_single_lv(filters={'lv_name': lv_name,
+                                                      'vg_name': vg_name})
+            except ValueError:
+                block_lv = None
+
+            if not block_lv:
+                block_lv = self.prepare_data_device('block', osd_fsid)
+
+            tags['ceph.block_device'] = block_lv.lv_path
+            tags['ceph.block_uuid'] = block_lv.lv_uuid
+            tags['ceph.cephx_lockbox_secret'] = cephx_lockbox_secret
+            tags['ceph.encrypted'] = encrypted
+            tags['ceph.vdo'] = api.is_vdo(block_lv.lv_path)
+            tags['ceph.type'] = 'block'
+            block_lv.set_tags(tags)
+
+            prepare_seastore(
+                block_lv.lv_path,
+                secrets,
+                tags,
+                self.osd_id,
+                osd_fsid,
+            )
 
     def main(self):
         sub_command_help = dedent("""
@@ -427,7 +480,7 @@ class Prepare(object):
         if len(self.argv) == 0:
             print(sub_command_help)
             return
-        exclude_group_options(parser, argv=self.argv, groups=['filestore', 'bluestore'])
+        exclude_group_options(parser, argv=self.argv, groups=['filestore', 'bluestore', 'seastore'])
         self.args = parser.parse_args(self.argv)
         # the unfortunate mix of one superset for both filestore and bluestore
         # makes this validation cumbersome
@@ -436,6 +489,6 @@ class Prepare(object):
                 raise SystemExit('--journal is required when using --filestore')
         # Default to bluestore here since defaulting it in add_argument may
         # cause both to be True
-        if not self.args.bluestore and not self.args.filestore:
+        if not self.args.bluestore and not self.args.filestore and not self.args.seastore:
             self.args.bluestore = True
         self.safe_prepare()
index ff7427eedd207bd7782655bc7be147ea71184012..6d4fc88373634039684dab888144f01516ea4252 100644 (file)
@@ -484,6 +484,69 @@ def osd_mkfs_bluestore(osd_id, fsid, keyring=None, wal=False, db=False):
                 raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
 
 
+def osd_mkfs_seastore(osd_id, fsid, keyring=None):
+    """
+    Create the files for the OSD to function. A normal call will look like:
+
+          ceph-osd --cluster ceph --mkfs --mkkey -i 0 \
+                   --monmap /var/lib/ceph/osd/ceph-0/activate.monmap \
+                   --osd-data /var/lib/ceph/osd/ceph-0 \
+                   --osd-uuid 8d208665-89ae-4733-8888-5d3bfbeeec6c \
+                   --keyring /var/lib/ceph/osd/ceph-0/keyring \
+                   --setuser ceph --setgroup ceph
+
+    In some cases it is required to use the keyring, when it is passed in as
+    a keyword argument it is used as part of the ceph-osd command
+    """
+    path = '/var/lib/ceph/osd/%s-%s/' % (conf.cluster, osd_id)
+    monmap = os.path.join(path, 'activate.monmap')
+
+    system.chown(path)
+
+    base_command = [
+        'ceph-osd',
+        '--cluster', conf.cluster,
+        '--osd-objectstore', 'seastore',
+        '--mkfs',
+        '--key', "/var/lib/ceph/osd/ceph-%s/keyring" % osd_id,
+        '-i', osd_id,
+        '--monmap', monmap,
+    ]
+
+    supplementary_command = [
+        '--osd-data', path,
+        '--osd-uuid', fsid,
+        '--setuser', 'ceph',
+        '--setgroup', 'ceph'
+    ]
+
+    if keyring is not None:
+        base_command.extend(['--keyfile', '-'])
+
+    if get_osdspec_affinity():
+        base_command.extend(['--osdspec-affinity', get_osdspec_affinity()])
+
+    command = base_command + supplementary_command
+
+    """
+    When running in containers the --mkfs on raw device sometimes fails
+    to acquire a lock through flock() on the device because systemd-udevd holds one temporarily.
+    See KernelDevice.cc and _lock() to understand how ceph-osd acquires the lock.
+    Because this is really transient, we retry up to 5 times and wait for 1 sec in-between
+    """
+    for retry in range(5):
+        _, _, returncode = process.call(command, stdin=keyring, terminal_verbose=True, show_command=True)
+        if returncode == 0:
+            break
+        else:
+            if returncode == errno.EWOULDBLOCK:
+                    time.sleep(1)
+                    logger.info('disk is held by another process, trying to mkfs again... (%s/5 attempt)' % retry)
+                    continue
+            else:
+                raise RuntimeError('Command failed with exit code %s: %s' % (returncode, ' '.join(command)))
+
+
 def osd_mkfs_filestore(osd_id, fsid, keyring):
     """
     Create the files for the OSD to function. A normal call will look like: