From: Ujjawal Anand Date: Sun, 15 Feb 2026 19:41:41 +0000 (+0530) Subject: cephadm: Handling OSD flags during upgrade X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=63416f28dfe7b774caf76dbcc819853e0e3d7d65;p=ceph-ci.git cephadm: Handling OSD flags during upgrade Fixes: https://tracker.ceph.com/issues/74956 Signed-off-by: Ujjawal Anand --- diff --git a/doc/cephadm/upgrade.rst b/doc/cephadm/upgrade.rst index 9c3eba8fbea..8736e8289e0 100644 --- a/doc/cephadm/upgrade.rst +++ b/doc/cephadm/upgrade.rst @@ -243,6 +243,54 @@ you need. For example, the following command upgrades to a development build: For more information about available container images, see :ref:`containers`. +Setting OSD flags during upgrade +================================ + +Cephadm can set specified OSD flags at upgrade start time and then unset these flags upon +upgrade completion. By default, cephadm will set and unset the ``noout,noscrub,nodeep-scrub`` OSD flags. + +To see the OSD flags cephadm is currently configured to set, check + +.. prompt:: bash # + + ceph config get mgr mgr/cephadm/upgrade_osd_flags + +The config option is a comma separated list of the flags to be set, and can be modified +by running + +.. prompt:: bash # + + ceph config set mgr mgr/cephadm/upgrade_osd_flags ,, . . . , + +Note that setting the config option overwrites the set of flags cephadm will +set. So if it is currently configured to set ``flag1`` and ``flag2`` and you do a config set +to have it set ``flag3`` and ``flag4`` it will ONLY be configured to set ``flag3`` and ``flag4``, NOT +``flag1``, ``flag2``, ``flag3``, and ``flag4``. + +Cephadm is configured to set these flags by default on upgrade in versions that +support it. To have cephadm skip setting these flags for a specific upgrade without +changing the persistent configuration, you can pass ``--no-osd-flags`` to the upgrade +command + +.. prompt:: bash # + + ceph orch upgrade start --image --no-osd-flags + +.. note:: + + To check if the current version of cephadm supports setting the osd flags, check + ``ceph orch upgrade start --help`` and look to see if ``--no-osd-flags`` is available + as a command argument. If so, it is supported and cephadm will set these flags by + default during the upgrade. + +.. note:: + + When OSD flag management is enabled (the default), cephadm sets/unsets the flags as + part of the ``ceph orch upgrade start`` command. This can make the command take a + little longer to return than it used to. + +Cephadm will only unset OSD flags that it set itself for the upgrade, and will +not remove flags that were already set before the upgrade started. .. _cephadm_staggered_upgrade: diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py index c815bbb1287..f26765957fd 100644 --- a/src/pybind/mgr/cephadm/module.py +++ b/src/pybind/mgr/cephadm/module.py @@ -411,6 +411,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): default=16, desc='Maximum number of OSD daemons upgraded in parallel.' ), + Option( + 'upgrade_osd_flags', + type='str', + default='noout,noscrub,nodeep-scrub', + desc='Comma separated list of OSD flags to set for the duration of an upgrade' + ), Option( 'service_discovery_port', type='int', @@ -597,6 +603,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule): self.apply_spec_fails: List[Tuple[str, str]] = [] self.max_osd_draining_count = 10 self.max_parallel_osd_upgrades = 16 + self.upgrade_osd_flags = 'noout,noscrub,nodeep-scrub' self.device_enhanced_scan = False self.inventory_list_all = False self.cgroups_split = True @@ -4080,7 +4087,7 @@ Then run the following: @handle_orch_error def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, host_placement: Optional[str] = None, - services: Optional[List[str]] = None, limit: Optional[int] = None) -> str: + services: Optional[List[str]] = None, limit: Optional[int] = None, no_osd_flags: bool = False) -> str: if self.inventory.get_host_with_state("maintenance"): raise OrchestratorError("Upgrade aborted - you have host(s) in maintenance state") if self.offline_hosts: @@ -4112,7 +4119,7 @@ Then run the following: raise OrchestratorError( f'Upgrade aborted - --limit arg must be a positive integer, not {limit}') - return self.upgrade.upgrade_start(image, version, daemon_types, hosts, services, limit) + return self.upgrade.upgrade_start(image, version, daemon_types, hosts, services, limit, no_osd_flags) @handle_orch_error def upgrade_pause(self) -> str: diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py index 26396f7f93d..b06773285d2 100644 --- a/src/pybind/mgr/cephadm/upgrade.py +++ b/src/pybind/mgr/cephadm/upgrade.py @@ -71,6 +71,7 @@ class UpgradeState: services: Optional[List[str]] = None, total_count: Optional[int] = None, remaining_count: Optional[int] = None, + osd_flags: Optional[List[str]] = None, ): self._target_name: str = target_name # Use CephadmUpgrade.target_image instead. self.progress_id: str = progress_id @@ -88,6 +89,8 @@ class UpgradeState: self.services = services self.total_count = total_count self.remaining_count = remaining_count + # Global OSD flags actually set by cephadm for the upgrade and then unset afterwards. + self.osd_flags: Optional[List[str]] = osd_flags def to_json(self) -> dict: return { @@ -106,6 +109,7 @@ class UpgradeState: 'services': self.services, 'total_count': self.total_count, 'remaining_count': self.remaining_count, + 'osd_flags': self.osd_flags, } @classmethod @@ -310,8 +314,135 @@ class CephadmUpgrade: r["tags"] = sorted(ls) return r + def _get_osd_flags_for_upgrade(self) -> List[str]: + """ + Get configured OSD flags to set during upgrade from + mgr/cephadm/upgrade_osd_flags(comma-separated list). + + Empty string means "do not manage any flags". + """ + # This is a cephadm module option, so read it from the mgr attribute. + s = str(self.mgr.upgrade_osd_flags or '').strip() + if not s: + return [] + + flags = [f.strip() for f in s.split(',') if f.strip()] + # sorting keeps the output readable. + return sorted(set(flags)) + + def _rollback_osd_flags(self, flags: List[str]) -> List[str]: + rollback_failed: List[str] = [] + for flag in flags: + try: + self.mgr.check_mon_command({ + 'prefix': 'osd unset', + 'key': flag, + }) + except MonCommandFailed as e: + rollback_failed.append(flag) + self.mgr.log.error(f'Upgrade: failed to rollback OSD flag {flag}: {e}') + return rollback_failed + + def _set_osd_flags_for_upgrade(self) -> None: + """ + Set configured OSD flags for the duration of the upgrade. + Track which flags are actually set by cephadm so we only unset those later. + """ + assert self.upgrade_state is not None + + desired_flags = self._get_osd_flags_for_upgrade() + if not desired_flags: + # Explicitly configured to manage no flags. + self.upgrade_state.osd_flags = [] + self._save_upgrade_state() + return + + osd_map = self.mgr.get("osd_map") + flags_str = osd_map.get('flags', '') or '' + current_flags = set(f for f in flags_str.split(',') if f) + + self.upgrade_state.osd_flags = [] + self._save_upgrade_state() + for flag in desired_flags: + if flag in current_flags: + # One-off at upgrade start: be explicit when we skip a flag. + self.mgr.log.info(f'Upgrade: OSD flag {flag} already set; leaving as-is') + continue + self.mgr.log.info(f'Upgrade: Setting OSD flag {flag} for upgrade duration') + try: + # Set the flag. If it fails, abort upgrade start. + self.mgr.check_mon_command({ + 'prefix': 'osd set', + 'key': flag, + }) + except MonCommandFailed as e: + self.mgr.log.error(f'Upgrade: failed to set OSD flag {flag}: {e}') + + # Unset anything we already set in this start attempt. + rollback_failed = self._rollback_osd_flags(self.upgrade_state.osd_flags or []) + + # Keep track of any flags we could not roll back. + self.upgrade_state.osd_flags = rollback_failed + self._save_upgrade_state() + + raise OrchestratorError(f'Failed to set OSD flag {flag}: {e}') + + # Only record the flag if it was set successfully. + self.upgrade_state.osd_flags.append(flag) + self._save_upgrade_state() + + def _restore_osd_flags_after_upgrade(self) -> None: + """ + Restore OSD flags to their pre-upgrade state. + + Unset only the flags that were set by cephadm for this upgrade, and + leave user-set flags untouched. + """ + if not self.upgrade_state: + # No upgrade in progress, nothing to restore. + return + + osd_map = self.mgr.get("osd_map") + flags_str = osd_map.get('flags', '') or '' + current_flags = set(f for f in flags_str.split(',') if f) + + # Only unset the flags cephadm actually set. If any unsets fail, keep track of + # them so we don't lose state and leave flags behind silently. + remaining_flags: List[str] = [] + for flag in (self.upgrade_state.osd_flags or []): + if flag not in current_flags: + self.mgr.log.info(f'Upgrade: OSD flag {flag} already unset; skipping') + continue + + self.mgr.log.info(f'Upgrade: Unsetting OSD flag {flag} after upgrade') + try: + # Just log any failures here and carry on trying to unset the rest. + self.mgr.check_mon_command({ + 'prefix': 'osd unset', + 'key': flag, + }) + except MonCommandFailed as e: + remaining_flags.append(flag) + self.mgr.log.error(f'Upgrade: failed to unset OSD flag {flag}: {e}') + continue + + if remaining_flags: + # At least one `osd unset` operation failed. + # Record the failed entries and unset them manually. + self.upgrade_state.osd_flags = remaining_flags + self._save_upgrade_state() + raise OrchestratorError( + 'Failed to restore OSD flags after upgrade. ' + f'Please manually unset: {",".join(remaining_flags)}' + ) + + # Clear stored state now that flags have been restored. + self.upgrade_state.osd_flags = [] + self._save_upgrade_state() + def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, - hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, limit: Optional[int] = None) -> str: + hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, + limit: Optional[int] = None, no_osd_flags: bool = False) -> str: fail_fs_value = cast(bool, self.mgr.get_module_option_ex( 'orchestrator', 'fail_fs', False)) if self.mgr.mode != 'root': @@ -358,6 +489,20 @@ class CephadmUpgrade: total_count=limit, remaining_count=limit, ) + # Set OSD flags for the duration of the upgrade (unless --no-osd-flags was requested). + # If this fails, abort and clear upgrade_state so we don't leave a half-started upgrade around. + try: + if no_osd_flags: + # User passed --no-osd-flags, so we don't manage any flags for this upgrade. + self.upgrade_state.osd_flags = [] + self._save_upgrade_state() + else: + self._set_osd_flags_for_upgrade() + except OrchestratorError as e: + self.mgr.log.error(f'Upgrade: failed to set OSD flags, aborting upgrade start: {e}') + self.upgrade_state = None + self._save_upgrade_state() + raise self._update_upgrade_progress(0.0) self._save_upgrade_state() self._clear_upgrade_health_checks() @@ -487,6 +632,8 @@ class CephadmUpgrade: if self.upgrade_state.progress_id: self.mgr.remote('progress', 'complete', self.upgrade_state.progress_id) + # Restore any OSD flags we temporarily set for this upgrade. + self._restore_osd_flags_after_upgrade() target_image = self.target_image self.mgr.log.info('Upgrade: Stopped') self.upgrade_state = None @@ -1086,6 +1233,8 @@ class CephadmUpgrade: if not self.upgrade_state: logger.debug('_mark_upgrade_complete upgrade already marked complete, exiting') return + # Restore OSD flags before we clear the upgrade state. + self._restore_osd_flags_after_upgrade() logger.info('Upgrade: Complete!') if self.upgrade_state.progress_id: self.mgr.remote('progress', 'complete', diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py index 136fde595ac..5c9ea15cc74 100644 --- a/src/pybind/mgr/orchestrator/_interface.py +++ b/src/pybind/mgr/orchestrator/_interface.py @@ -949,7 +949,7 @@ class Orchestrator(object): raise NotImplementedError() def upgrade_start(self, image: Optional[str], version: Optional[str], daemon_types: Optional[List[str]], - hosts: Optional[str], services: Optional[List[str]], limit: Optional[int]) -> OrchResult[str]: + hosts: Optional[str], services: Optional[List[str]], limit: Optional[int], no_osd_flags: bool = False) -> OrchResult[str]: raise NotImplementedError() def upgrade_pause(self) -> OrchResult[str]: diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py index 5fc2fce63fa..a471fc39505 100644 --- a/src/pybind/mgr/orchestrator/module.py +++ b/src/pybind/mgr/orchestrator/module.py @@ -2569,12 +2569,13 @@ Usage: hosts: Optional[str] = None, services: Optional[str] = None, limit: Optional[int] = None, + no_osd_flags: bool = False, ceph_version: Optional[str] = None) -> HandleCommandResult: """Initiate upgrade""" self._upgrade_check_image_name(image, ceph_version) dtypes = daemon_types.split(',') if daemon_types is not None else None service_names = services.split(',') if services is not None else None - completion = self.upgrade_start(image, ceph_version, dtypes, hosts, service_names, limit) + completion = self.upgrade_start(image, ceph_version, dtypes, hosts, service_names, limit, no_osd_flags) raise_if_exception(completion) return HandleCommandResult(stdout=completion.result_str())