cephadm: Handling OSD flags during upgrade

author Ujjawal Anand <ujjawal.anand@ibm.com>

Sun, 15 Feb 2026 19:41:41 +0000 (01:11 +0530)

committer Ujjawal Anand <ujjawal.anand@ibm.com>

Thu, 19 Feb 2026 12:15:01 +0000 (17:45 +0530)
author Ujjawal Anand <ujjawal.anand@ibm.com>
Sun, 15 Feb 2026 19:41:41 +0000 (01:11 +0530)
committer Ujjawal Anand <ujjawal.anand@ibm.com>
Thu, 19 Feb 2026 12:15:01 +0000 (17:45 +0530)
diff --git a/doc/cephadm/upgrade.rst b/doc/cephadm/upgrade.rst

index 9c3eba8fbea461574170eb1980f426d95b07ad40..8736e8289e056b4228f33e517e91b1a19c21bdcd 100644 (file)
--- a/doc/cephadm/upgrade.rst
+++ b/doc/cephadm/upgrade.rst
@@ -243,6 +243,54 @@ you need. For example, the following command upgrades to a development build:
  
  For more information about available container images, see :ref:`containers`.
  
+Setting OSD flags during upgrade
+================================
+
+Cephadm can set specified OSD flags at upgrade start time and then unset these flags upon
+upgrade completion. By default, cephadm will set and unset the ``noout,noscrub,nodeep-scrub`` OSD flags.
+
+To see the OSD flags cephadm is currently configured to set, check
+
+.. prompt:: bash #
+
+  ceph config get mgr mgr/cephadm/upgrade_osd_flags
+
+The config option is a comma separated list of the flags to be set, and can be modified
+by running
+
+.. prompt:: bash #
+
+  ceph config set mgr mgr/cephadm/upgrade_osd_flags <flag1>,<flag2>, . . . ,<flagN>
+
+Note that setting the config option overwrites the set of flags cephadm will
+set. So if it is currently configured to set ``flag1`` and ``flag2`` and you do a config set
+to have it set ``flag3`` and ``flag4`` it will ONLY be configured to set ``flag3`` and ``flag4``, NOT
+``flag1``, ``flag2``, ``flag3``, and ``flag4``.
+
+Cephadm is configured to set these flags by default on upgrade in versions that
+support it. To have cephadm skip setting these flags for a specific upgrade without
+changing the persistent configuration, you can pass ``--no-osd-flags`` to the upgrade
+command
+
+.. prompt:: bash #
+
+  ceph orch upgrade start --image <image> --no-osd-flags
+
+.. note::
+
+   To check if the current version of cephadm supports setting the osd flags, check
+   ``ceph orch upgrade start --help`` and look to see if ``--no-osd-flags`` is available
+   as a command argument. If so, it is supported and cephadm will set these flags by
+   default during the upgrade.
+
+.. note::
+
+   When OSD flag management is enabled (the default), cephadm sets/unsets the flags as
+   part of the ``ceph orch upgrade start`` command. This can make the command take a
+   little longer to return than it used to.
+
+Cephadm will only unset OSD flags that it set itself for the upgrade, and will
+not remove flags that were already set before the upgrade started.
  
  .. _cephadm_staggered_upgrade:
  
diff --git a/src/pybind/mgr/cephadm/module.py b/src/pybind/mgr/cephadm/module.py

index c815bbb128798dbb35511bbbc6a44448c91c22b2..f26765957fd44bec16c8dd11a0856548bc6e4611 100644 (file)
--- a/src/pybind/mgr/cephadm/module.py
+++ b/src/pybind/mgr/cephadm/module.py
@@ -411,6 +411,12 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
              default=16,
              desc='Maximum number of OSD daemons upgraded in parallel.'
          ),
+        Option(
+            'upgrade_osd_flags',
+            type='str',
+            default='noout,noscrub,nodeep-scrub',
+            desc='Comma separated list of OSD flags to set for the duration of an upgrade'
+        ),
          Option(
              'service_discovery_port',
              type='int',
@@ -597,6 +603,7 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
              self.apply_spec_fails: List[Tuple[str, str]] = []
              self.max_osd_draining_count = 10
              self.max_parallel_osd_upgrades = 16
+            self.upgrade_osd_flags = 'noout,noscrub,nodeep-scrub'
              self.device_enhanced_scan = False
              self.inventory_list_all = False
              self.cgroups_split = True
@@ -4080,7 +4087,7 @@ Then run the following:
  
      @handle_orch_error
      def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None, host_placement: Optional[str] = None,
-                      services: Optional[List[str]] = None, limit: Optional[int] = None) -> str:
+                      services: Optional[List[str]] = None, limit: Optional[int] = None, no_osd_flags: bool = False) -> str:
          if self.inventory.get_host_with_state("maintenance"):
              raise OrchestratorError("Upgrade aborted - you have host(s) in maintenance state")
          if self.offline_hosts:
@@ -4112,7 +4119,7 @@ Then run the following:
                  raise OrchestratorError(
                      f'Upgrade aborted - --limit arg must be a positive integer, not {limit}')
  
-        return self.upgrade.upgrade_start(image, version, daemon_types, hosts, services, limit)
+        return self.upgrade.upgrade_start(image, version, daemon_types, hosts, services, limit, no_osd_flags)
  
      @handle_orch_error
      def upgrade_pause(self) -> str:
diff --git a/src/pybind/mgr/cephadm/upgrade.py b/src/pybind/mgr/cephadm/upgrade.py

index 26396f7f93d7e917c9b61355037871c2d76bf4ef..b06773285d260b39c6c7c1c0f20b519cb5165f87 100644 (file)
--- a/src/pybind/mgr/cephadm/upgrade.py
+++ b/src/pybind/mgr/cephadm/upgrade.py
@@ -71,6 +71,7 @@ class UpgradeState:
                   services: Optional[List[str]] = None,
                   total_count: Optional[int] = None,
                   remaining_count: Optional[int] = None,
+                 osd_flags: Optional[List[str]] = None,
                   ):
          self._target_name: str = target_name  # Use CephadmUpgrade.target_image instead.
          self.progress_id: str = progress_id
@@ -88,6 +89,8 @@ class UpgradeState:
          self.services = services
          self.total_count = total_count
          self.remaining_count = remaining_count
+        # Global OSD flags actually set by cephadm for the upgrade and then unset afterwards.
+        self.osd_flags: Optional[List[str]] = osd_flags
  
      def to_json(self) -> dict:
          return {
@@ -106,6 +109,7 @@ class UpgradeState:
              'services': self.services,
              'total_count': self.total_count,
              'remaining_count': self.remaining_count,
+            'osd_flags': self.osd_flags,
          }
  
      @classmethod
@@ -310,8 +314,135 @@ class CephadmUpgrade:
              r["tags"] = sorted(ls)
          return r
  
+    def _get_osd_flags_for_upgrade(self) -> List[str]:
+        """
+        Get configured OSD flags to set during upgrade from
+        mgr/cephadm/upgrade_osd_flags(comma-separated list).
+
+        Empty string means "do not manage any flags".
+        """
+        # This is a cephadm module option, so read it from the mgr attribute.
+        s = str(self.mgr.upgrade_osd_flags or '').strip()
+        if not s:
+            return []
+
+        flags = [f.strip() for f in s.split(',') if f.strip()]
+        # sorting keeps the output readable.
+        return sorted(set(flags))
+
+    def _rollback_osd_flags(self, flags: List[str]) -> List[str]:
+        rollback_failed: List[str] = []
+        for flag in flags:
+            try:
+                self.mgr.check_mon_command({
+                    'prefix': 'osd unset',
+                    'key': flag,
+                })
+            except MonCommandFailed as e:
+                rollback_failed.append(flag)
+                self.mgr.log.error(f'Upgrade: failed to rollback OSD flag {flag}: {e}')
+        return rollback_failed
+
+    def _set_osd_flags_for_upgrade(self) -> None:
+        """
+        Set configured OSD flags for the duration of the upgrade.
+        Track which flags are actually set by cephadm so we only unset those later.
+        """
+        assert self.upgrade_state is not None
+
+        desired_flags = self._get_osd_flags_for_upgrade()
+        if not desired_flags:
+            # Explicitly configured to manage no flags.
+            self.upgrade_state.osd_flags = []
+            self._save_upgrade_state()
+            return
+
+        osd_map = self.mgr.get("osd_map")
+        flags_str = osd_map.get('flags', '') or ''
+        current_flags = set(f for f in flags_str.split(',') if f)
+
+        self.upgrade_state.osd_flags = []
+        self._save_upgrade_state()
+        for flag in desired_flags:
+            if flag in current_flags:
+                # One-off at upgrade start: be explicit when we skip a flag.
+                self.mgr.log.info(f'Upgrade: OSD flag {flag} already set; leaving as-is')
+                continue
+            self.mgr.log.info(f'Upgrade: Setting OSD flag {flag} for upgrade duration')
+            try:
+                # Set the flag. If it fails, abort upgrade start.
+                self.mgr.check_mon_command({
+                    'prefix': 'osd set',
+                    'key': flag,
+                })
+            except MonCommandFailed as e:
+                self.mgr.log.error(f'Upgrade: failed to set OSD flag {flag}: {e}')
+
+                # Unset anything we already set in this start attempt.
+                rollback_failed = self._rollback_osd_flags(self.upgrade_state.osd_flags or [])
+
+                # Keep track of any flags we could not roll back.
+                self.upgrade_state.osd_flags = rollback_failed
+                self._save_upgrade_state()
+
+                raise OrchestratorError(f'Failed to set OSD flag {flag}: {e}')
+
+            # Only record the flag if it was set successfully.
+            self.upgrade_state.osd_flags.append(flag)
+            self._save_upgrade_state()
+
+    def _restore_osd_flags_after_upgrade(self) -> None:
+        """
+        Restore OSD flags to their pre-upgrade state.
+
+        Unset only the flags that were set by cephadm for this upgrade, and
+        leave user-set flags untouched.
+        """
+        if not self.upgrade_state:
+            # No upgrade in progress, nothing to restore.
+            return
+
+        osd_map = self.mgr.get("osd_map")
+        flags_str = osd_map.get('flags', '') or ''
+        current_flags = set(f for f in flags_str.split(',') if f)
+
+        # Only unset the flags cephadm actually set. If any unsets fail, keep track of
+        # them so we don't lose state and leave flags behind silently.
+        remaining_flags: List[str] = []
+        for flag in (self.upgrade_state.osd_flags or []):
+            if flag not in current_flags:
+                self.mgr.log.info(f'Upgrade: OSD flag {flag} already unset; skipping')
+                continue
+
+            self.mgr.log.info(f'Upgrade: Unsetting OSD flag {flag} after upgrade')
+            try:
+                # Just log any failures here and carry on trying to unset the rest.
+                self.mgr.check_mon_command({
+                    'prefix': 'osd unset',
+                    'key': flag,
+                })
+            except MonCommandFailed as e:
+                remaining_flags.append(flag)
+                self.mgr.log.error(f'Upgrade: failed to unset OSD flag {flag}: {e}')
+                continue
+
+        if remaining_flags:
+            # At least one `osd unset` operation failed.
+            # Record the failed entries and unset them manually.
+            self.upgrade_state.osd_flags = remaining_flags
+            self._save_upgrade_state()
+            raise OrchestratorError(
+                'Failed to restore OSD flags after upgrade. '
+                f'Please manually unset: {",".join(remaining_flags)}'
+            )
+
+        # Clear stored state now that flags have been restored.
+        self.upgrade_state.osd_flags = []
+        self._save_upgrade_state()
+
      def upgrade_start(self, image: str, version: str, daemon_types: Optional[List[str]] = None,
-                      hosts: Optional[List[str]] = None, services: Optional[List[str]] = None, limit: Optional[int] = None) -> str:
+                      hosts: Optional[List[str]] = None, services: Optional[List[str]] = None,
+                      limit: Optional[int] = None, no_osd_flags: bool = False) -> str:
          fail_fs_value = cast(bool, self.mgr.get_module_option_ex(
              'orchestrator', 'fail_fs', False))
          if self.mgr.mode != 'root':
@@ -358,6 +489,20 @@ class CephadmUpgrade:
              total_count=limit,
              remaining_count=limit,
          )
+        # Set OSD flags for the duration of the upgrade (unless --no-osd-flags was requested).
+        # If this fails, abort and clear upgrade_state so we don't leave a half-started upgrade around.
+        try:
+            if no_osd_flags:
+                # User passed --no-osd-flags, so we don't manage any flags for this upgrade.
+                self.upgrade_state.osd_flags = []
+                self._save_upgrade_state()
+            else:
+                self._set_osd_flags_for_upgrade()
+        except OrchestratorError as e:
+            self.mgr.log.error(f'Upgrade: failed to set OSD flags, aborting upgrade start: {e}')
+            self.upgrade_state = None
+            self._save_upgrade_state()
+            raise
          self._update_upgrade_progress(0.0)
          self._save_upgrade_state()
          self._clear_upgrade_health_checks()
@@ -487,6 +632,8 @@ class CephadmUpgrade:
          if self.upgrade_state.progress_id:
              self.mgr.remote('progress', 'complete',
                              self.upgrade_state.progress_id)
+        # Restore any OSD flags we temporarily set for this upgrade.
+        self._restore_osd_flags_after_upgrade()
          target_image = self.target_image
          self.mgr.log.info('Upgrade: Stopped')
          self.upgrade_state = None
@@ -1086,6 +1233,8 @@ class CephadmUpgrade:
          if not self.upgrade_state:
              logger.debug('_mark_upgrade_complete upgrade already marked complete, exiting')
              return
+        # Restore OSD flags before we clear the upgrade state.
+        self._restore_osd_flags_after_upgrade()
          logger.info('Upgrade: Complete!')
          if self.upgrade_state.progress_id:
              self.mgr.remote('progress', 'complete',
diff --git a/src/pybind/mgr/orchestrator/_interface.py b/src/pybind/mgr/orchestrator/_interface.py

index 136fde595ac05abde23e2a7785510544f07577e6..5c9ea15cc7425f6681a136e0819234c3ba0596bc 100644 (file)
--- a/src/pybind/mgr/orchestrator/_interface.py
+++ b/src/pybind/mgr/orchestrator/_interface.py
@@ -949,7 +949,7 @@ class Orchestrator(object):
          raise NotImplementedError()
  
      def upgrade_start(self, image: Optional[str], version: Optional[str], daemon_types: Optional[List[str]],
-                      hosts: Optional[str], services: Optional[List[str]], limit: Optional[int]) -> OrchResult[str]:
+                      hosts: Optional[str], services: Optional[List[str]], limit: Optional[int], no_osd_flags: bool = False) -> OrchResult[str]:
          raise NotImplementedError()
  
      def upgrade_pause(self) -> OrchResult[str]:
diff --git a/src/pybind/mgr/orchestrator/module.py b/src/pybind/mgr/orchestrator/module.py

index 5fc2fce63fae5501964671fd3f3264a62d2bff69..a471fc39505589e3148595f98043dff41e935464 100644 (file)
--- a/src/pybind/mgr/orchestrator/module.py
+++ b/src/pybind/mgr/orchestrator/module.py
@@ -2569,12 +2569,13 @@ Usage:
                         hosts: Optional[str] = None,
                         services: Optional[str] = None,
                         limit: Optional[int] = None,
+                       no_osd_flags: bool = False,
                         ceph_version: Optional[str] = None) -> HandleCommandResult:
          """Initiate upgrade"""
          self._upgrade_check_image_name(image, ceph_version)
          dtypes = daemon_types.split(',') if daemon_types is not None else None
          service_names = services.split(',') if services is not None else None
-        completion = self.upgrade_start(image, ceph_version, dtypes, hosts, service_names, limit)
+        completion = self.upgrade_start(image, ceph_version, dtypes, hosts, service_names, limit, no_osd_flags)
          raise_if_exception(completion)
          return HandleCommandResult(stdout=completion.result_str())
author	Ujjawal Anand <ujjawal.anand@ibm.com>
	Sun, 15 Feb 2026 19:41:41 +0000 (01:11 +0530)
committer	Ujjawal Anand <ujjawal.anand@ibm.com>
	Thu, 19 Feb 2026 12:15:01 +0000 (17:45 +0530)
doc/cephadm/upgrade.rst		patch \| blob \| history
src/pybind/mgr/cephadm/module.py		patch \| blob \| history
src/pybind/mgr/cephadm/upgrade.py		patch \| blob \| history
src/pybind/mgr/orchestrator/_interface.py		patch \| blob \| history
src/pybind/mgr/orchestrator/module.py		patch \| blob \| history