]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: rotate keyring for core ceph daemons during upgrade
authorAdam King <adking@redhat.com>
Wed, 14 May 2025 17:16:43 +0000 (13:16 -0400)
committerPatrick Donnelly <pdonnell@ibm.com>
Mon, 26 Jan 2026 15:28:00 +0000 (10:28 -0500)
Specifically, this causes us to rotate the mgr, mon, OSD,
and mds keyrings. The mgr and mon keyring are done as soon
as we see all the mons have been upgraded and OSD/mds happens
when we reach them in the upgrade order.

Signed-off-by: Adam King <adking@redhat.com>
src/cephadm/cephadm.py
src/cephadm/cephadmlib/daemons/ceph.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/serve.py
src/pybind/mgr/cephadm/services/osd.py
src/pybind/mgr/cephadm/upgrade.py

index 4e255e551656bf9e793c3bfa7916937c0730adf9..dba0c85a290e8506ea02a37b3ea4bfd395772d93 100755 (executable)
@@ -180,6 +180,7 @@ from cephadmlib.daemons import (
     Keepalived,
     Monitoring,
     NFSGanesha,
+    OSD,
     SMB,
     SNMPGateway,
     MgmtGateway,
@@ -621,8 +622,28 @@ def create_daemon_dirs(
 
     if keyring:
         keyring_path = os.path.join(data_dir, 'keyring')
+        config_json = fetch_configs(ctx)
+        key_path_exists = False
+        key_path_content = 'N/A'
+        try:
+            key_path_exists = os.path.exists(keyring_path)
+            key_path_content = open(keyring_path, 'r').read()
+        except Exception:
+            pass
+        update_bluestore_label_osd_keyring = False
+        if (
+            ident.daemon_type == 'osd'
+            and key_path_exists
+            and key_path_content != keyring
+        ):
+            # need to update keyring with ceph-bluestore-tool
+            update_bluestore_label_osd_keyring = True
         with write_new(keyring_path, owner=(uid, gid)) as f:
             f.write(keyring)
+        if update_bluestore_label_osd_keyring:
+            osd_daemon_form = OSD.create(ctx, ident)
+            # osd_daemon_form = OSD.init(ctx, ctx.fsid, ident.daemon_id)
+            osd_daemon_form.rotate_osd_lv_keyring(ctx, keyring_path)
 
     if daemon_type in Monitoring.components.keys():
         config_json = fetch_configs(ctx)
@@ -4555,6 +4576,11 @@ def _add_deploy_parser_args(
         default=None,
         help='Time in seconds to wait for graceful service shutdown before forcefully killing it'
     )
+    parser_deploy.add_argument(
+        '--osd-dm-crypt-key',
+        default=None,
+        help="dm-crypt key for OSD, needed for deployment if OSD's cephx keyring has been rotated"
+    )
 
 
 def _name_opts(parser: argparse.ArgumentParser) -> None:
index ac16bbebcfcc9525998e8d8733b389a1ed6ccd72..4a27e5bbc0c852f34102e9b16b1b107ec7438c6e 100644 (file)
@@ -1,5 +1,6 @@
 import logging
 import os
+import json
 
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -12,6 +13,7 @@ from ..context_getters import (
 )
 from ..daemon_form import register as register_daemon_form
 from ..daemon_identity import DaemonIdentity
+from ..call_wrappers import call, CallVerbosity
 from ..constants import DEFAULT_IMAGE
 from ..context import CephadmContext
 from ..deployment_utils import to_deployment_container
@@ -22,7 +24,9 @@ from ..file_utils import (
     pathify,
     populate_files,
     makedirs,
+    read_file,
     recursive_chown,
+    write_new,
 )
 from ..data_utils import dict_get
 from ..host_facts import HostFacts
@@ -304,9 +308,11 @@ class OSD(Ceph):
         ctx: CephadmContext,
         ident: DaemonIdentity,
         osd_fsid: Optional[str] = None,
+        osd_dm_crypt_key: Optional[str] = None,
     ) -> None:
         super().__init__(ctx, ident)
         self._osd_fsid = osd_fsid
+        self._osd_dm_crypt_key: str = osd_dm_crypt_key or ''
 
     @classmethod
     def create(cls, ctx: CephadmContext, ident: DaemonIdentity) -> 'OSD':
@@ -315,7 +321,8 @@ class OSD(Ceph):
             logger.info(
                 'Creating an OSD daemon form without an OSD FSID value'
             )
-        return cls(ctx, ident, osd_fsid)
+        osd_dm_crypt_key = getattr(ctx, 'osd_dm_crypt_key', None)
+        return cls(ctx, ident, osd_fsid, osd_dm_crypt_key)
 
     @staticmethod
     def get_sysctl_settings() -> List[str]:
@@ -332,6 +339,181 @@ class OSD(Ceph):
     def osd_fsid(self) -> Optional[str]:
         return self._osd_fsid
 
+    def rotate_osd_lv_keyring(
+        self, ctx: CephadmContext, keyring_path: str
+    ) -> None:
+        keyring_content = read_file([keyring_path])
+        if not keyring_content or keyring_content == 'Unknown':
+            raise Error(
+                f'Failed to find OSD keyring content at expected path: {keyring_path}'
+            )
+        actual_keyring = keyring_content
+        # if our keyring is the full thing with sections and caps, we don't want that
+        # just the actual keyring itself
+        try:
+            actual_keyring = (
+                keyring_content.split('key =', 1)[1]
+                .split('caps', 1)[0]
+                .strip()
+            )
+        except IndexError:
+            logger.error(
+                f'Failed to parse keyring from {keyring_content} for rotation of key for osd.{self.identity.daemon_id}'
+            )
+        c_v_container = CephContainer(
+            ctx,
+            image=ctx.image,
+            privileged=True,
+            entrypoint='ceph-volume',
+            args=[
+                'lvm',
+                'list',
+                str(self.identity.daemon_id),
+                '--format',
+                'json',
+            ],
+            volume_mounts=get_ceph_mounts_for_type(
+                ctx, ctx.fsid, 'ceph-volume'
+            ),
+        )
+        out, err, ret = call(
+            ctx,
+            c_v_container.run_cmd(),
+            verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+        )
+        if ret:
+            raise Error(
+                f'Got error using ceph-volume lvm list to get lv path for osd.{self.identity.daemon_id}\n'
+                f'Out:{out}\n'
+                f'Err:{err}'
+            )
+        osd_bluestore_data = json.loads(out)
+        lv_path = ''
+        encrypted = False
+        osd_lv_data = osd_bluestore_data.get(self.identity.daemon_id, [])
+        if osd_lv_data:
+            for dev in osd_lv_data:
+                if dev.get('type') == 'block':
+                    lv_path = dev.get('lv_path', '')
+                    encrypted = (
+                        dev.get('tags', {}).get('ceph.encrypted', '0') == '1'
+                    )
+        if not lv_path:
+            raise Error(
+                f'Failed to find lv path for osd with id "{self.identity.daemon_id}". Key not rotated using ceph-bluestore-tool'
+            )
+        if encrypted and not self._osd_dm_crypt_key:
+            raise Error(
+                f'Cannot rotate keyring for encrypted osd with id "{self.identity.daemon_id}" without dm-crypt key.'
+                'Key not rotated using ceph-bluestore-tool'
+            )
+        dev_path = lv_path
+
+        # OSD must be stopped to make changes to bluestore labels
+        logger.info(
+            f'Stopping osd.{self.identity.daemon_id} to update osd_key bluestore label'
+        )
+        call(
+            ctx,
+            ['systemctl', 'stop', self.identity.unit_name],
+            verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+        )
+
+        if encrypted:
+            dev_path = f'/dev/mapper/tmp_open_osd_{self._osd_fsid}'
+            cryptsetup_action = """#!/bin/bash
+DM_CRYPT_KEY=%s
+LV_PATH=%s
+DEV_NAME=%s
+
+echo "$DM_CRYPT_KEY" | cryptsetup luksOpen $LV_PATH $DEV_NAME
+""" % (
+                self._osd_dm_crypt_key,
+                lv_path,
+                dev_path.split('/')[-1],
+            )
+            helper_script_path = (
+                f'/tmp/cephadm-osd-{self.identity.daemon_id}-rotate-helper.sh'
+            )
+            with write_new(helper_script_path, perms=0o700) as f:
+                f.write(cryptsetup_action)
+            cryptsetup_open_container = CephContainer(
+                ctx,
+                image=ctx.image,
+                privileged=True,
+                entrypoint='/tmp/cryptsetup_action.sh',
+                volume_mounts={
+                    '/dev': '/dev',
+                    helper_script_path: '/tmp/cryptsetup_action.sh',
+                },
+            )
+            out, err, ret = call(
+                ctx,
+                cryptsetup_open_container.run_cmd(),
+                verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+            )
+            os.remove(helper_script_path)
+            if ret:
+                raise Error(
+                    'Got error rotating osd keyring while using cryptsetup tool\n'
+                    f'Out:{out}\n'
+                    f'Err:{err}'
+                )
+
+        bluestore_tool_container = CephContainer(
+            ctx,
+            image=ctx.image,
+            privileged=True,
+            entrypoint='ceph-bluestore-tool',
+            args=[
+                '--dev',
+                dev_path,
+                'set-label-key',
+                '-k',
+                'osd_key',
+                '-v',
+                actual_keyring,
+            ],
+            volume_mounts={'/dev': '/dev'},
+        )
+        out, err, ret = call(
+            ctx,
+            bluestore_tool_container.run_cmd(),
+            verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+        )
+        if ret:
+            raise Error(
+                'Got error rotating osd keyring using ceph-bluestore-tool\n'
+                f'Out:{out}\n'
+                f'Err:{err}'
+            )
+
+        if encrypted:
+            cryptsetup_close_container = CephContainer(
+                ctx,
+                image=ctx.image,
+                privileged=True,
+                entrypoint='cryptsetup',
+                args=[
+                    'luksClose',
+                    dev_path,
+                ],
+                volume_mounts={
+                    '/dev': '/dev',
+                },
+            )
+            out, err, ret = call(
+                ctx,
+                cryptsetup_close_container.run_cmd(),
+                verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+            )
+            if ret:
+                raise Error(
+                    'Got error rotating osd keyring while using cryptsetup tool\n'
+                    f'Out:{out}\n'
+                    f'Err:{err}'
+                )
+
 
 @register_daemon_form
 class CephExporter(ContainerDaemonForm):
index aec32f1fb7b0c664354bcfde985ff249225c7a43..b9abd99a4fd1bb6e988a13928cd69b091eb5d7f4 100644 (file)
@@ -2619,6 +2619,18 @@ Then run the following:
 
         return self.perform_service_action(action, service_name)
 
+    def key_rotate(self, daemon_spec: CephadmDaemonDeploySpec) -> None:
+        rc, out, err = self.mon_command({
+            'prefix': 'auth rotate',
+            'entity': daemon_spec.entity_name(),
+            'format': 'json',
+        })
+        if rc:
+            raise OrchestratorError(
+                f'Failed to rotate daemon key for {daemon_spec.entity_name()}.\n'
+                f'Rc: {rc}\nOut: {out}\nErr: {err}'
+            )
+
     def _rotate_daemon_key(self, daemon_spec: CephadmDaemonDeploySpec) -> str:
         self.log.info(f'Rotating authentication key for {daemon_spec.name()}')
         rc, out, err = self.mon_command({
index 3f1c63bcda114821d6aecda836a2ebfbd1dcb017..815d5ad13b3e8c23c103f44881c7b06edeb0c2c3 100644 (file)
@@ -1429,6 +1429,15 @@ class CephadmServe:
                     if not osd_uuid:
                         raise OrchestratorError('osd.%s not in osdmap' % daemon_spec.daemon_id)
                     daemon_params['osd_fsid'] = osd_uuid
+                    # we may need a dm-crypt key to rotate this OSD's keyring
+                    # if it is encrypted. If it is not encrypted, no such
+                    # key will exist
+                    rc, ckg_out, ckg_err = self.mgr.mon_command({
+                        'prefix': 'config-key get',
+                        'key': f'dm-crypt/osd/{osd_uuid}/luks',
+                    })
+                    if not rc and ckg_out:
+                        daemon_params['osd_dm_crypt_key'] = ckg_out
 
                 if reconfig:
                     daemon_params['reconfig'] = True
index 60a399149f9f9377ef77b34409f50ef656bdb9a8..31039b9b25bfba5e55acd4f2e43b76be914363ac 100644 (file)
@@ -40,10 +40,6 @@ class OSDService(CephService):
 
         async def create_from_spec_one(host: str, drive_selection: DriveSelection) -> Optional[str]:
             # skip this host if there has been no change in inventory
-            if not self.mgr.cache.osdspec_needs_apply(host, drive_group):
-                self.mgr.log.debug("skipping apply of %s on %s (no change)" % (
-                    host, drive_group))
-                return None
             # skip this host if we cannot schedule here
             if self.mgr.inventory.has_label(host, SpecialHostLabels.DRAIN_DAEMONS):
                 return None
index 26396f7f93d7e917c9b61355037871c2d76bf4ef..c0e7c63cd786781453b3869e673e004db54a102a 100644 (file)
@@ -71,6 +71,7 @@ class UpgradeState:
                  services: Optional[List[str]] = None,
                  total_count: Optional[int] = None,
                  remaining_count: Optional[int] = None,
+                 rotated_mgr_mon_auth_key_daemons: Optional[List[str]] = None,
                  ):
         self._target_name: str = target_name  # Use CephadmUpgrade.target_image instead.
         self.progress_id: str = progress_id
@@ -88,6 +89,7 @@ class UpgradeState:
         self.services = services
         self.total_count = total_count
         self.remaining_count = remaining_count
+        self.rotated_mgr_mon_auth_key_daemons = rotated_mgr_mon_auth_key_daemons
 
     def to_json(self) -> dict:
         return {
@@ -106,6 +108,7 @@ class UpgradeState:
             'services': self.services,
             'total_count': self.total_count,
             'remaining_count': self.remaining_count,
+            'rotated_mgr_mon_auth_key_daemons': self.rotated_mgr_mon_auth_key_daemons,
         }
 
     @classmethod
@@ -127,7 +130,8 @@ class CephadmUpgrade:
         'UPGRADE_REDEPLOY_DAEMON',
         'UPGRADE_BAD_TARGET_VERSION',
         'UPGRADE_EXCEPTION',
-        'UPGRADE_OFFLINE_HOST'
+        'UPGRADE_OFFLINE_HOST',
+        'UPGRADE_KEY_ROTATION'
     ]
 
     def __init__(self, mgr: "CephadmOrchestrator"):
@@ -867,11 +871,52 @@ class CephadmUpgrade:
                 break
         return True, to_upgrade
 
+    def _rotate_mgr_mon_auth_keys(self, target_image: str, target_digests: Optional[List[str]] = None) -> None:
+        if self.upgrade_state:
+            if self.upgrade_state.rotated_mgr_mon_auth_key_daemons is None:
+                self.upgrade_state.rotated_mgr_mon_auth_key_daemons = []
+            # do mgr and mon keyrings as one off after mons have been upgraded
+            mon_daemons = self.mgr.cache.get_daemons_by_service('mon')
+            _, mons_needing_upgrade, __, ___ = self._detect_need_upgrade(mon_daemons, target_digests, target_image)
+            if not mons_needing_upgrade:
+                # all mons have been upgraded if we get here
+                for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+                    if dd.name() in self.upgrade_state.rotated_mgr_mon_auth_key_daemons:
+                        continue
+                    daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd)
+                    self.mgr.key_rotate(daemon_spec)
+                    if self.mgr.daemon_is_self(daemon_spec.daemon_type, daemon_spec.daemon_id):
+                        self.mgr._schedule_daemon_action(daemon_spec.name(), 'redeploy')
+                    else:
+                        self.mgr._daemon_action(daemon_spec, action='redeploy')
+                    self.upgrade_state.rotated_mgr_mon_auth_key_daemons.append(daemon_spec.name())
+                    self._save_upgrade_state()
+                # mon daemons share a key, only do one key rotation
+                # but still trigger redeploy for each mon
+                if 'mon' not in self.upgrade_state.rotated_mgr_mon_auth_key_daemons:
+                    self.mgr.key_rotate(
+                        CephadmDaemonDeploySpec.from_daemon_description(
+                            mon_daemons[0]
+                        )
+                    )
+                    self.upgrade_state.rotated_mgr_mon_auth_key_daemons.append('mon')
+                    self._save_upgrade_state()
+                for dd in mon_daemons:
+                    if dd.name() in self.upgrade_state.rotated_mgr_mon_auth_key_daemons:
+                        continue
+                    daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd)
+                    self.mgr._daemon_action(daemon_spec, action='redeploy')
+                    self.upgrade_state.rotated_mgr_mon_auth_key_daemons.append(daemon_spec.name())
+                    self._save_upgrade_state()
+            else:
+                self.mgr.log.debug('Skipping mgr/mon key rotation, mons not upgraded')
+
     def _upgrade_daemons(self, to_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str, target_digests: Optional[List[str]] = None) -> None:
         assert self.upgrade_state is not None
         num = 1
         if target_digests is None:
             target_digests = []
+        self._rotate_mgr_mon_auth_keys(target_image, target_digests)
         for d_entry in to_upgrade:
             if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0 and not d_entry[1]:
                 self.mgr.log.info(
@@ -924,9 +969,26 @@ class CephadmUpgrade:
             else:
                 logger.info('Upgrade: Updating %s.%s' %
                             (d.daemon_type, d.daemon_id))
+
+            daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d)
+
+            try:
+                if daemon_spec.daemon_type in ['mds', 'osd']:
+                    daemon_spec.keyring = None
+                    self.mgr.key_rotate(daemon_spec)
+            except Exception as e:
+                self._fail_upgrade('UPGRADE_KEY_ROTATION', {
+                    'severity': 'warning',
+                    'summary': f'Rotation of cephx key for daemon {d.name()} on host {d.hostname} failed.',
+                    'count': 1,
+                    'detail': [
+                        f'Upgrade daemon key rotation: {d.name()}: {e}'
+                    ],
+                })
+                return
+
             action = 'Upgrading' if not d_entry[1] else 'Redeploying'
             try:
-                daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d)
                 self.mgr._daemon_action(
                     daemon_spec,
                     'redeploy',