From 062ec6feddec4cc326f89c63c6b87ea9e0c3bd91 Mon Sep 17 00:00:00 2001 From: Pedro Gonzalez Gomez Date: Thu, 7 May 2026 21:36:32 +0200 Subject: [PATCH] mgr: add prerequisites check before enabling dashboard oauth2 sso Assisted-by: Claude:claude-4.6-sonnet Fixes: https://tracker.ceph.com/issues/76476 Signed-off-by: Pedro Gonzalez Gomez --- PendingReleaseNotes | 1 + doc/cephadm/services/oauth2-proxy.rst | 2 +- doc/mgr/dashboard.rst | 23 +++++++++++++ .../mgr/cephadm/services/oauth2_proxy.py | 22 ++++++++++++- src/pybind/mgr/dashboard/module.py | 21 ++++++++++-- .../mgr/dashboard/services/auth/oauth2.py | 5 +-- src/pybind/mgr/dashboard/services/sso.py | 32 ++++++++++++++----- 7 files changed, 91 insertions(+), 15 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 8a691de69271..f7f8d7b136f5 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -294,6 +294,7 @@ * Dashboard: RGW Topics and Bucket Notification Management * Dashboard: RGW granular bucket replication * Dashboard: SMB monitoring and management +* Dashboard: `ceph dashboard sso enable oauth2` checks for oauth2-proxy service for SSO enablement.  If the `oauth2-proxy` service goes down, cephadm will automatically disable Dashboard OAuth2 SSO. * Monitoring: New monitoring Dashboards for Application, NVMe, CephFS, and SMB Overview * RGW: Introduce the `rgw_usage_log_key_transition` configuration option to handle the co-existence of old and new usage log keys. This option is enabled by default diff --git a/doc/cephadm/services/oauth2-proxy.rst b/doc/cephadm/services/oauth2-proxy.rst index 4f0ab2e84172..f0c3a9c1209a 100644 --- a/doc/cephadm/services/oauth2-proxy.rst +++ b/doc/cephadm/services/oauth2-proxy.rst @@ -119,7 +119,7 @@ A non-exhaustive list of important limitations for the `oauth2-proxy` service fo * High-availability configurations for `oauth2-proxy` itself are not supported. * Proper configuration of the IDP and OAuth2 parameters is crucial to avoid authentication failures. Misconfigurations can lead to access issues. * IDP must include the jti claim in the issued JWT token because the Ceph Dashboard relies on this value to verify the token's validity. - +* If the `oauth2-proxy` service goes down, cephadm will automatically disable Dashboard OAuth2 SSO. Container images ~~~~~~~~~~~~~~~~ diff --git a/doc/mgr/dashboard.rst b/doc/mgr/dashboard.rst index ca8ccff886ce..bbe30a6259ef 100644 --- a/doc/mgr/dashboard.rst +++ b/doc/mgr/dashboard.rst @@ -822,6 +822,29 @@ To enable SSO: ceph dashboard sso enable oauth2 +Automatic SSO Disable on Service Failure +""""""""""""""""""""""""""""""""""""""""" + +OAuth2 SSO depends on ``oauth2-proxy`` service being active. +Cephadm continuously monitors this service and will **automatically disable OAuth2 SSO** +in the Dashboard if the service goes down or is removed. +A warning is recorded in the Ceph log when this happens: + +.. code-block:: text + + OAuth2 SSO has been automatically disabled because oauth2-proxy is no longer running. + +Once the required services are back up and running, SSO must be **re-enabled manually**: + +.. prompt:: bash $ + + ceph dashboard sso enable oauth2 + +.. note:: + + Enabling OAuth2 SSO is blocked if cephadm detects that ``oauth2-proxy`` is not currently running. Deploy and start this service before + attempting to enable SSO. + .. _dashboard-alerting: Enabling Prometheus Alerting diff --git a/src/pybind/mgr/cephadm/services/oauth2_proxy.py b/src/pybind/mgr/cephadm/services/oauth2_proxy.py index 66d53bbe9dd5..3cbc03e6d11c 100644 --- a/src/pybind/mgr/cephadm/services/oauth2_proxy.py +++ b/src/pybind/mgr/cephadm/services/oauth2_proxy.py @@ -2,7 +2,7 @@ import logging from typing import List, Any, Tuple, Dict, cast, Optional, TYPE_CHECKING from copy import copy -from orchestrator import DaemonDescription +from orchestrator import DaemonDescription, DaemonDescriptionStatus from ceph.deployment.service_spec import OAuth2ProxySpec, MgmtGatewaySpec, ServiceSpec from cephadm.services.cephadmservice import CephadmService, CephadmDaemonDeploySpec from .service_registry import register_cephadm_service @@ -83,3 +83,23 @@ class OAuth2ProxyService(CephadmService): } return daemon_config, sorted(OAuth2ProxyService.get_dependencies(self.mgr)) + + def post_remove(self, daemon: DaemonDescription, is_failed_deploy: bool) -> None: + """ + Called after an oauth2-proxy daemon is removed. + Disables Dashboard OAuth2 SSO if no other oauth2-proxy daemons remain running. + """ + remaining_running = any( + dd.status == DaemonDescriptionStatus.running and dd.name() != daemon.name() + for dd in self.mgr.cache.get_daemons_by_service('oauth2-proxy') + ) + if not remaining_running: + try: + if 'dashboard' in self.mgr.get('mgr_map')['modules'] \ + and self.mgr.get_module_option_ex('dashboard', 'sso_oauth2'): + logger.warning('Last oauth2-proxy daemon removed, disabling Dashboard OAuth2 SSO') + self.mgr.remote('dashboard', 'disable_oauth2_sso') + except Exception as e: + logger.warning('Failed to disable OAuth2 SSO after oauth2-proxy removal: %s', e) + + super().post_remove(daemon, is_failed_deploy=is_failed_deploy) diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py index 1f5798b41f2a..fffa768575be 100644 --- a/src/pybind/mgr/dashboard/module.py +++ b/src/pybind/mgr/dashboard/module.py @@ -34,11 +34,10 @@ from mgr_util import ServerConfigException, build_url, \ from . import mgr from .cli import DBCLICommand -from .controllers import nvmeof # noqa # pylint: disable=unused-import -from .controllers import Router, json_error_page +from .controllers import Router, json_error_page, nvmeof # noqa # pylint: disable=unused-import from .grafana import push_local_dashboards from .services import nvmeof_cli, nvmeof_top_cli # noqa # pylint: disable=unused-import -from .services.auth import AuthManager, AuthManagerTool, JwtManager +from .services.auth import AuthManager, AuthManagerTool, AuthType, JwtManager from .services.exception import dashboard_exception_handler from .services.nvmeof_top_cli import NvmeofTopCollector from .services.service import RgwServiceManager @@ -581,6 +580,22 @@ class Module(MgrModule, CherryPyConfig): return (-errno.EINVAL, '', 'Command not found \'{0}\'' .format(cmd['prefix'])) + def disable_oauth2_sso(self) -> None: + """ + Disable OAuth2 SSO if it is currently active. + Called remotely by cephadm when oauth2-proxy goes down. + """ + if not self.get_module_option('sso_oauth2', False): + return + load_sso_db() + mgr.SSO_DB.protocol = AuthType.LOCAL + mgr.SSO_DB.save() + self.set_module_option('sso_oauth2', False) + self.log.warning( + 'OAuth2 SSO has been automatically disabled because ' + 'oauth2-proxy is no longer running.' + ) + def notify(self, notify_type: NotifyType, notify_id): NotificationQueue.new_notification(str(notify_type), notify_id) diff --git a/src/pybind/mgr/dashboard/services/auth/oauth2.py b/src/pybind/mgr/dashboard/services/auth/oauth2.py index d7ffabb57238..70564028bbb2 100644 --- a/src/pybind/mgr/dashboard/services/auth/oauth2.py +++ b/src/pybind/mgr/dashboard/services/auth/oauth2.py @@ -130,7 +130,8 @@ class OAuth2(SSOAuth): raise cherrypy.HTTPError() try: user = mgr.ACCESS_CTRL_DB.create_user( - jwt_payload['sub'], None, jwt_payload.get('name', None), jwt_payload.get('email', None)) + jwt_payload['sub'], None, + jwt_payload.get('name', None), jwt_payload.get('email', None)) except UserAlreadyExists: logger.debug("User already exists") user = mgr.ACCESS_CTRL_DB.get_user(jwt_payload['sub']) @@ -156,7 +157,7 @@ class OAuth2(SSOAuth): payload = decode_jwt_segment(token.split(".")[1]) return time.time() > payload.get('exp', 0) except Exception: - return True + raise cherrypy.HTTPError(500, 'Failed to verify session') @classmethod def get_token_iss(cls, token=''): diff --git a/src/pybind/mgr/dashboard/services/sso.py b/src/pybind/mgr/dashboard/services/sso.py index bb555d7dd347..2e614481c76e 100644 --- a/src/pybind/mgr/dashboard/services/sso.py +++ b/src/pybind/mgr/dashboard/services/sso.py @@ -97,6 +97,22 @@ def load_sso_db(): @DBCLICommand.Write("dashboard sso enable oauth2") def enable_sso(_, roles_path: Optional[str] = None): + if mgr.get_module_option_ex('orchestrator', 'orchestrator') == 'cephadm': + from orchestrator import DaemonDescriptionStatus + + from .orchestrator import OrchClient + orch = OrchClient.instance() + if orch.available(): + daemons = orch.services.list_daemons(daemon_type='oauth2-proxy') + oauth2_proxy_running = any( + d.status == DaemonDescriptionStatus.running for d in daemons + ) + if not oauth2_proxy_running: + return HandleCommandResult( + retval=-errno.EPERM, + stderr='OAuth2 SSO prerequisite not met: ' + 'oauth2-proxy service must be deployed and running.' + ) mgr.SSO_DB.protocol = AuthType.OAUTH2 if jmespath and roles_path: try: @@ -160,8 +176,11 @@ def handle_sso_command(cmd): 'dashboard sso setup saml2']: return -errno.ENOSYS, '', '' - if not python_saml_imported: - return -errno.EPERM, '', 'Required library not found: `python3-saml`' + if cmd['prefix'] == 'dashboard sso status': + if not mgr.SSO_DB.protocol == AuthType.LOCAL: + return 0, f'SSO is "enabled" with "{mgr.SSO_DB.protocol.name}" protocol.', '' + + return 0, 'SSO is "disabled".', '' if cmd['prefix'] == 'dashboard sso disable': mgr.SSO_DB.protocol = AuthType.LOCAL @@ -169,6 +188,9 @@ def handle_sso_command(cmd): mgr.set_module_option('sso_oauth2', False) return 0, 'SSO is "disabled".', '' + if not python_saml_imported: + return -errno.EPERM, '', 'Required library not found: `python3-saml`' + if cmd['prefix'] == 'dashboard sso enable saml2': configured = _is_sso_configured() if configured: @@ -178,12 +200,6 @@ def handle_sso_command(cmd): return -errno.EPERM, '', 'Single Sign-On is not configured: ' \ 'use `ceph dashboard sso setup saml2`' - if cmd['prefix'] == 'dashboard sso status': - if not mgr.SSO_DB.protocol == AuthType.LOCAL: - return 0, f'SSO is "enabled" with "{mgr.SSO_DB.protocol}" protocol.', '' - - return 0, 'SSO is "disabled".', '' - if cmd['prefix'] == 'dashboard sso show saml2': return 0, json.dumps(mgr.SSO_DB.config.to_dict()), '' -- 2.47.3