]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: rotate keyring for core ceph daemons during upgrade
authorAdam King <adking@redhat.com>
Wed, 14 May 2025 17:16:43 +0000 (13:16 -0400)
committerPatrick Donnelly <pdonnell@ibm.com>
Wed, 1 Oct 2025 19:44:21 +0000 (15:44 -0400)
Specifically, this causes us to rotate the mgr, mon, OSD,
and mds keyrings. The mgr and mon keyring are done as soon
as we see all the mons have been upgraded and OSD/mds happens
when we reach them in the upgrade order.

NOTE: This patch alone is not enough to get this working
for encrypted OSDs

Signed-off-by: Adam King <adking@redhat.com>
src/cephadm/cephadm.py
src/cephadm/cephadmlib/daemons/ceph.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/services/osd.py
src/pybind/mgr/cephadm/upgrade.py

index 26f13ecda34d1085f870597967d3ed7dbf465758..3ff55244cebea569756c4c4915f18b62ae83228a 100755 (executable)
@@ -180,6 +180,7 @@ from cephadmlib.daemons import (
     Keepalived,
     Monitoring,
     NFSGanesha,
+    OSD,
     SMB,
     SNMPGateway,
     MgmtGateway,
@@ -621,8 +622,28 @@ def create_daemon_dirs(
 
     if keyring:
         keyring_path = os.path.join(data_dir, 'keyring')
+        config_json = fetch_configs(ctx)
+        key_path_exists = False
+        key_path_content = 'N/A'
+        try:
+            key_path_exists = os.path.exists(keyring_path)
+            key_path_content = open(keyring_path, 'r').read()
+        except Exception:
+            pass
+        update_bluestore_label_osd_keyring = False
+        if (
+            ident.daemon_type == 'osd'
+            and os.path.exists(keyring_path)
+            and open(keyring_path, 'r').read() != keyring
+        ):
+            # need to update keyring with ceph-bluestore-tool
+            update_bluestore_label_osd_keyring = True
         with write_new(keyring_path, owner=(uid, gid)) as f:
             f.write(keyring)
+        if update_bluestore_label_osd_keyring:
+            osd_daemon_form = OSD.create(ctx, ident)
+            # osd_daemon_form = OSD.init(ctx, ctx.fsid, ident.daemon_id)
+            osd_daemon_form.rotate_osd_lv_keyring(ctx, keyring_path)
 
     if daemon_type in Monitoring.components.keys():
         config_json = fetch_configs(ctx)
index ac16bbebcfcc9525998e8d8733b389a1ed6ccd72..0730cf82028e723c67b5a453219006fc6bee8c26 100644 (file)
@@ -1,5 +1,6 @@
 import logging
 import os
+import json
 
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -12,6 +13,7 @@ from ..context_getters import (
 )
 from ..daemon_form import register as register_daemon_form
 from ..daemon_identity import DaemonIdentity
+from ..call_wrappers import call, CallVerbosity
 from ..constants import DEFAULT_IMAGE
 from ..context import CephadmContext
 from ..deployment_utils import to_deployment_container
@@ -22,6 +24,7 @@ from ..file_utils import (
     pathify,
     populate_files,
     makedirs,
+    read_file,
     recursive_chown,
 )
 from ..data_utils import dict_get
@@ -332,6 +335,102 @@ class OSD(Ceph):
     def osd_fsid(self) -> Optional[str]:
         return self._osd_fsid
 
+    def rotate_osd_lv_keyring(
+        self, ctx: CephadmContext, keyring_path: str
+    ) -> None:
+        keyring_content = read_file([keyring_path])
+        if not keyring_content or keyring_content == 'Unknown':
+            raise Error(
+                f'Failed to find OSD keyring content at expected path: {keyring_path}'
+            )
+        actual_keyring = keyring_content
+        # if our keyring is the full thing with sections and caps, we don't want that
+        # just the actual keyring itself
+        try:
+            actual_keyring = (
+                keyring_content.split('key =', 1)[1]
+                .split('caps', 1)[0]
+                .strip()
+            )
+        except IndexError:
+            logger.error(
+                f'Failed to parse keyring from {keyring_content} for rotation of key for osd.{self.identity.daemon_id}'
+            )
+        c_v_container = CephContainer(
+            ctx,
+            image=ctx.image,
+            privileged=True,
+            entrypoint='ceph-volume',
+            args=[
+                'lvm',
+                'list',
+                str(self.identity.daemon_id),
+                '--format',
+                'json',
+            ],
+            volume_mounts=get_ceph_mounts_for_type(
+                ctx, ctx.fsid, 'ceph-volume'
+            ),
+        )
+        out, err, ret = call(
+            ctx,
+            c_v_container.run_cmd(),
+            verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+        )
+        if ret:
+            raise Error(
+                f'Got error using ceph-volume lvm list to get lv path for osd.{self.identity.daemon_id}\n'
+                f'Out:{out}\n'
+                f'Err:{err}'
+            )
+        osd_bluestore_data = json.loads(out)
+        lv_path = ''
+        osd_lv_data = osd_bluestore_data.get(self.identity.daemon_id, [])
+        if osd_lv_data:
+            for dev in osd_lv_data:
+                if dev.get('type') == 'block':
+                    lv_path = dev.get('lv_path', '')
+        if not lv_path:
+            raise Error(
+                f'Failed to find lv path for osd with id "{self.identity.daemon_id}". Key not rotated using ceph-bluestore-tool'
+            )
+        bluestore_tool_container = CephContainer(
+            ctx,
+            image=ctx.image,
+            privileged=True,
+            entrypoint='ceph-bluestore-tool',
+            args=[
+                '--dev',
+                lv_path,
+                'set-label-key',
+                '-k',
+                'osd_key',
+                '-v',
+                actual_keyring,
+            ],
+            volume_mounts={'/dev': '/dev'},
+        )
+        # OSD must be stopped to make changes to bluestore labels
+        logger.info(
+            f'Stopping osd.{self.identity.daemon_id} to update osd_key bluestore label'
+        )
+        call(
+            ctx,
+            ['systemctl', 'stop', self.identity.unit_name],
+            verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+        )
+        out, err, ret = call(
+            ctx,
+            bluestore_tool_container.run_cmd(),
+            verbosity=CallVerbosity.QUIET_UNLESS_ERROR,
+        )
+        if ret:
+            raise Error(
+                'Got error rotating osd keyring using ceph-bluestore-tool\n'
+                f'Out:{out}\n'
+                f'Err:{err}'
+            )
+
 
 @register_daemon_form
 class CephExporter(ContainerDaemonForm):
index be17f4a4223a8c5994dcc0aeea24dc5874a59b9f..04656c9449d7bed18aed5d4e642db783e73d5bcb 100644 (file)
@@ -2616,6 +2616,18 @@ Then run the following:
 
         return self.perform_service_action(action, service_name)
 
+    def key_rotate(self, daemon_spec: CephadmDaemonDeploySpec) -> None:
+        rc, out, err = self.mon_command({
+            'prefix': 'auth rotate',
+            'entity': daemon_spec.entity_name(),
+            'format': 'json',
+        })
+        if rc:
+            raise OrchestratorError(
+                f'Failed to rotate daemon key for {daemon_spec.entity_name()}.\n'
+                f'Rc: {rc}\nOut: {out}\nErr: {err}'
+            )
+
     def _rotate_daemon_key(self, daemon_spec: CephadmDaemonDeploySpec) -> str:
         self.log.info(f'Rotating authentication key for {daemon_spec.name()}')
         rc, out, err = self.mon_command({
index 60a399149f9f9377ef77b34409f50ef656bdb9a8..31039b9b25bfba5e55acd4f2e43b76be914363ac 100644 (file)
@@ -40,10 +40,6 @@ class OSDService(CephService):
 
         async def create_from_spec_one(host: str, drive_selection: DriveSelection) -> Optional[str]:
             # skip this host if there has been no change in inventory
-            if not self.mgr.cache.osdspec_needs_apply(host, drive_group):
-                self.mgr.log.debug("skipping apply of %s on %s (no change)" % (
-                    host, drive_group))
-                return None
             # skip this host if we cannot schedule here
             if self.mgr.inventory.has_label(host, SpecialHostLabels.DRAIN_DAEMONS):
                 return None
index 26396f7f93d7e917c9b61355037871c2d76bf4ef..c0e7c63cd786781453b3869e673e004db54a102a 100644 (file)
@@ -71,6 +71,7 @@ class UpgradeState:
                  services: Optional[List[str]] = None,
                  total_count: Optional[int] = None,
                  remaining_count: Optional[int] = None,
+                 rotated_mgr_mon_auth_key_daemons: Optional[List[str]] = None,
                  ):
         self._target_name: str = target_name  # Use CephadmUpgrade.target_image instead.
         self.progress_id: str = progress_id
@@ -88,6 +89,7 @@ class UpgradeState:
         self.services = services
         self.total_count = total_count
         self.remaining_count = remaining_count
+        self.rotated_mgr_mon_auth_key_daemons = rotated_mgr_mon_auth_key_daemons
 
     def to_json(self) -> dict:
         return {
@@ -106,6 +108,7 @@ class UpgradeState:
             'services': self.services,
             'total_count': self.total_count,
             'remaining_count': self.remaining_count,
+            'rotated_mgr_mon_auth_key_daemons': self.rotated_mgr_mon_auth_key_daemons,
         }
 
     @classmethod
@@ -127,7 +130,8 @@ class CephadmUpgrade:
         'UPGRADE_REDEPLOY_DAEMON',
         'UPGRADE_BAD_TARGET_VERSION',
         'UPGRADE_EXCEPTION',
-        'UPGRADE_OFFLINE_HOST'
+        'UPGRADE_OFFLINE_HOST',
+        'UPGRADE_KEY_ROTATION'
     ]
 
     def __init__(self, mgr: "CephadmOrchestrator"):
@@ -867,11 +871,52 @@ class CephadmUpgrade:
                 break
         return True, to_upgrade
 
+    def _rotate_mgr_mon_auth_keys(self, target_image: str, target_digests: Optional[List[str]] = None) -> None:
+        if self.upgrade_state:
+            if self.upgrade_state.rotated_mgr_mon_auth_key_daemons is None:
+                self.upgrade_state.rotated_mgr_mon_auth_key_daemons = []
+            # do mgr and mon keyrings as one off after mons have been upgraded
+            mon_daemons = self.mgr.cache.get_daemons_by_service('mon')
+            _, mons_needing_upgrade, __, ___ = self._detect_need_upgrade(mon_daemons, target_digests, target_image)
+            if not mons_needing_upgrade:
+                # all mons have been upgraded if we get here
+                for dd in self.mgr.cache.get_daemons_by_service('mgr'):
+                    if dd.name() in self.upgrade_state.rotated_mgr_mon_auth_key_daemons:
+                        continue
+                    daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd)
+                    self.mgr.key_rotate(daemon_spec)
+                    if self.mgr.daemon_is_self(daemon_spec.daemon_type, daemon_spec.daemon_id):
+                        self.mgr._schedule_daemon_action(daemon_spec.name(), 'redeploy')
+                    else:
+                        self.mgr._daemon_action(daemon_spec, action='redeploy')
+                    self.upgrade_state.rotated_mgr_mon_auth_key_daemons.append(daemon_spec.name())
+                    self._save_upgrade_state()
+                # mon daemons share a key, only do one key rotation
+                # but still trigger redeploy for each mon
+                if 'mon' not in self.upgrade_state.rotated_mgr_mon_auth_key_daemons:
+                    self.mgr.key_rotate(
+                        CephadmDaemonDeploySpec.from_daemon_description(
+                            mon_daemons[0]
+                        )
+                    )
+                    self.upgrade_state.rotated_mgr_mon_auth_key_daemons.append('mon')
+                    self._save_upgrade_state()
+                for dd in mon_daemons:
+                    if dd.name() in self.upgrade_state.rotated_mgr_mon_auth_key_daemons:
+                        continue
+                    daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(dd)
+                    self.mgr._daemon_action(daemon_spec, action='redeploy')
+                    self.upgrade_state.rotated_mgr_mon_auth_key_daemons.append(daemon_spec.name())
+                    self._save_upgrade_state()
+            else:
+                self.mgr.log.debug('Skipping mgr/mon key rotation, mons not upgraded')
+
     def _upgrade_daemons(self, to_upgrade: List[Tuple[DaemonDescription, bool]], target_image: str, target_digests: Optional[List[str]] = None) -> None:
         assert self.upgrade_state is not None
         num = 1
         if target_digests is None:
             target_digests = []
+        self._rotate_mgr_mon_auth_keys(target_image, target_digests)
         for d_entry in to_upgrade:
             if self.upgrade_state.remaining_count is not None and self.upgrade_state.remaining_count <= 0 and not d_entry[1]:
                 self.mgr.log.info(
@@ -924,9 +969,26 @@ class CephadmUpgrade:
             else:
                 logger.info('Upgrade: Updating %s.%s' %
                             (d.daemon_type, d.daemon_id))
+
+            daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d)
+
+            try:
+                if daemon_spec.daemon_type in ['mds', 'osd']:
+                    daemon_spec.keyring = None
+                    self.mgr.key_rotate(daemon_spec)
+            except Exception as e:
+                self._fail_upgrade('UPGRADE_KEY_ROTATION', {
+                    'severity': 'warning',
+                    'summary': f'Rotation of cephx key for daemon {d.name()} on host {d.hostname} failed.',
+                    'count': 1,
+                    'detail': [
+                        f'Upgrade daemon key rotation: {d.name()}: {e}'
+                    ],
+                })
+                return
+
             action = 'Upgrading' if not d_entry[1] else 'Redeploying'
             try:
-                daemon_spec = CephadmDaemonDeploySpec.from_daemon_description(d)
                 self.mgr._daemon_action(
                     daemon_spec,
                     'redeploy',