From: Nizamudeen A Date: Thu, 5 Feb 2026 10:42:47 +0000 (+0530) Subject: mgr: isolated CherryPy to prevent global state sharing X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fheads%2Fwip-nia-isolate-cherrypy;p=ceph-ci.git mgr: isolated CherryPy to prevent global state sharing as the modules are now being loaded onto the main interpreter (see https://github.com/ceph/ceph/pull/66244), the cherrypy is getting hit with an issue where its global state is being affecting all the modules updating the cherrypy config simultaneously in the same tree. So i am adding a CherryPyMgr which manages all the independent servers that will be created across all modules. This CherryPyMgr will create its own server instances by utilizing cherrypy's WSGI Server and eliminates the global state sharing. Each module or app can create their own tree and start an adapter which will open an independent server for that app. Fixes: https://tracker.ceph.com/issues/74643, https://tracker.ceph.com/issues/74543 Signed-off-by: Nizamudeen A --- diff --git a/.github/labeler.yml b/.github/labeler.yml index 932b2a23018..1dcdf585963 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -55,6 +55,7 @@ mgr: - src/pybind/mgr/ceph_module.pyi - src/pybind/mgr/mgr_module.py - src/pybind/mgr/mgr_util.py + - src/pybind/mgr/cherrypy_mgr.py - src/pybind/mgr/object_format.py - src/pybind/mgr/requirements.txt - src/pybind/mgr/tox.ini diff --git a/ceph.spec.in b/ceph.spec.in index 18cd57b2683..e9a3bac48dc 100644 --- a/ceph.spec.in +++ b/ceph.spec.in @@ -1920,6 +1920,7 @@ fi %{_datadir}/ceph/mgr/mgr_module.* %{_datadir}/ceph/mgr/mgr_util.* %{_datadir}/ceph/mgr/object_format.* +%{_datadir}/ceph/mgr/cherrypy_mgr.* %{_unitdir}/ceph-mgr@.service %{_unitdir}/ceph-mgr.target %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mgr diff --git a/debian/ceph-mgr.install b/debian/ceph-mgr.install index 11a4a9ce4e2..803dcf6bcd8 100644 --- a/debian/ceph-mgr.install +++ b/debian/ceph-mgr.install @@ -3,3 +3,4 @@ usr/bin/ceph-mgr usr/share/ceph/mgr/mgr_module.* usr/share/ceph/mgr/mgr_util.* usr/share/ceph/mgr/object_format.* +usr/share/ceph/mgr/cherrypy_mgr.* diff --git a/src/pybind/mgr/CMakeLists.txt b/src/pybind/mgr/CMakeLists.txt index 9e900f859d7..b83ba1b387e 100644 --- a/src/pybind/mgr/CMakeLists.txt +++ b/src/pybind/mgr/CMakeLists.txt @@ -58,5 +58,5 @@ set(mgr_modules install(DIRECTORY ${mgr_modules} DESTINATION ${CEPH_INSTALL_DATADIR}/mgr ${mgr_module_install_excludes}) -install(FILES mgr_module.py mgr_util.py object_format.py +install(FILES mgr_module.py mgr_util.py object_format.py cherrypy_mgr.py DESTINATION ${CEPH_INSTALL_DATADIR}/mgr) diff --git a/src/pybind/mgr/cephadm/agent.py b/src/pybind/mgr/cephadm/agent.py index cec4ab0ab81..8c7111e5f49 100644 --- a/src/pybind/mgr/cephadm/agent.py +++ b/src/pybind/mgr/cephadm/agent.py @@ -1,11 +1,4 @@ -try: - import cherrypy - from cherrypy._cpserver import Server -except ImportError: - # to avoid sphinx build crash - class Server: # type: ignore - pass - +import cherrypy import json import logging import socket @@ -33,18 +26,6 @@ if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator -def cherrypy_filter(record: logging.LogRecord) -> bool: - blocked = [ - 'TLSV1_ALERT_DECRYPT_ERROR' - ] - msg = record.getMessage() - return not any([m for m in blocked if m in msg]) - - -logging.getLogger('cherrypy.error').addFilter(cherrypy_filter) -cherrypy.log.access_log.propagate = False - - CEPHADM_AGENT_CERT_DURATION = (365 * 5) @@ -57,13 +38,21 @@ class AgentEndpoint: self.key_file: IO[bytes] self.cert_file: IO[bytes] - def configure_routes(self) -> None: - conf = {'/': {'tools.trailing_slash.on': False}} - - cherrypy.tree.mount(self.host_data, '/data', config=conf) - cherrypy.tree.mount(self.node_proxy_endpoint, '/node-proxy', config=conf) - - def configure_tls(self, server: Server) -> None: + def get_cherrypy_config(self) -> Dict: + config = { + '/': { + 'tools.trailing_slash.on': False + } + } + return config + + def configure_routes(self, config) -> List[tuple]: + return [ + (self.host_data, '/data', config), + (self.node_proxy_endpoint, '/node-proxy', config), + ] + + def configure_tls(self) -> Dict[str, str]: self.mgr.cert_mgr.register_self_signed_cert_key_pair(CephadmAgent.TYPE) tls_pair = self._get_agent_certificates() self.cert_file = tempfile.NamedTemporaryFile() @@ -75,7 +64,10 @@ class AgentEndpoint: self.key_file.flush() # pkey_tmp must not be gc'ed verify_tls_files(self.cert_file.name, self.key_file.name) - server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name + return { + 'cert': self.cert_file.name, + 'key': self.key_file.name, + } def _get_agent_certificates(self) -> TLSCredentials: host = self.mgr.get_hostname() @@ -97,12 +89,14 @@ class AgentEndpoint: self.server_port += 1 self.mgr.log.error(f'Cephadm agent could not find free port in range {max_port - 150}-{max_port} and failed to start') - def configure(self) -> None: - self.host_data = HostData(self.mgr, self.server_port, self.server_addr) - self.configure_tls(self.host_data) + def configure(self, tree) -> None: + self.host_data = HostData(self.mgr) + ssl_info = self.configure_tls() self.node_proxy_endpoint = NodeProxyEndpoint(self.mgr) - self.configure_routes() + config = self.get_cherrypy_config() + mount_specs = self.configure_routes(config) self.find_free_port() + return config, ssl_info, mount_specs, (self.server_addr, self.server_port) class NodeProxyEndpoint: @@ -636,22 +630,11 @@ class NodeProxyEndpoint: return results -class HostData(Server): +class HostData: exposed = True - def __init__(self, mgr: "CephadmOrchestrator", port: int, host: str): + def __init__(self, mgr: "CephadmOrchestrator"): self.mgr = mgr - super().__init__() - self.socket_port = port - self.socket_host = host - self.subscribe() - - def stop(self) -> None: - # we must call unsubscribe before stopping the server, - # otherwise the port is not released and we will get - # an exception when trying to restart it - self.unsubscribe() - super().stop() @cherrypy.tools.allow(methods=['POST']) @cherrypy.tools.json_in() diff --git a/src/pybind/mgr/cephadm/http_server.py b/src/pybind/mgr/cephadm/http_server.py index baa00a3eb5a..68198e2a264 100644 --- a/src/pybind/mgr/cephadm/http_server.py +++ b/src/pybind/mgr/cephadm/http_server.py @@ -2,6 +2,8 @@ import cherrypy import threading import logging from typing import TYPE_CHECKING +from cherrypy import _cptree +from cherrypy_mgr import CherryPyMgr from cephadm.agent import AgentEndpoint from cephadm.services.service_discovery import ServiceDiscovery @@ -12,42 +14,20 @@ if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator -def cherrypy_filter(record: logging.LogRecord) -> bool: - blocked = [ - 'TLSV1_ALERT_DECRYPT_ERROR' - ] - msg = record.getMessage() - return not any([m for m in blocked if m in msg]) - - -logging.getLogger('cherrypy.error').addFilter(cherrypy_filter) -cherrypy.log.access_log.propagate = False - - class CephadmHttpServer(threading.Thread): def __init__(self, mgr: "CephadmOrchestrator") -> None: self.mgr = mgr self.agent = AgentEndpoint(mgr) self.service_discovery = ServiceDiscovery(mgr) self.cherrypy_shutdown_event = threading.Event() + self.cherrypy_restart_event = threading.Event() self._service_discovery_port = self.mgr.service_discovery_port security_enabled, _, _ = self.mgr._get_security_config() self.security_enabled = security_enabled + self.agent_adapter = None + self.server_adapter = None super().__init__(target=self.run) - def configure_cherrypy(self) -> None: - cherrypy.config.update({ - 'environment': 'production', - 'engine.autoreload.on': False, - }) - - def configure(self) -> None: - self.configure_cherrypy() - self.agent.configure() - self.service_discovery.configure(self.mgr.service_discovery_port, - self.mgr.get_mgr_ip(), - self.security_enabled) - def config_update(self) -> None: self.service_discovery_port = self.mgr.service_discovery_port security_enabled, _, _ = self.mgr._get_security_config() @@ -76,27 +56,73 @@ class CephadmHttpServer(threading.Thread): self.restart() def restart(self) -> None: - cherrypy.engine.stop() - cherrypy.server.httpserver = None - self.configure() - cherrypy.engine.start() + self.cherrypy_restart_event.set() def run(self) -> None: + def start_servers(): + # start service discovery server + sd_config, sd_ssl_info = self.service_discovery.configure( + self.mgr.service_discovery_port, + self.mgr.get_mgr_ip(), + self.security_enabled + ) + sd_port = self._service_discovery_port + sd_ip = self.mgr.get_mgr_ip() + self.mgr.log.info(f'Starting service discovery server on {sd_ip}:{sd_port}...') + + sd_tree = _cptree.Tree() + sd_tree.mount(self.service_discovery, "/sd", config=sd_config) + adapter_sd, _ = CherryPyMgr.mount( + sd_tree, + (sd_ip, int(sd_port)), + ssl_info=sd_ssl_info + ) + + # start agent server + agent_config, agent_ssl_info, agent_mounts, bind_addr = self.agent.configure() + self.mgr.log.info(f'Starting agent server on {bind_addr[0]}:{bind_addr[1]}...') + + agent_tree = _cptree.Tree() + agent_tree.mount(self.agent, "/", config=agent_config) + + for app, path, conf in agent_mounts: + agent_tree.mount(app, path, config=conf) + + adapter_agent, _ = CherryPyMgr.mount( + agent_tree, + bind_addr, + ssl_info=agent_ssl_info + ) + return adapter_sd, adapter_agent + try: - self.mgr.log.debug('Starting cherrypy engine...') - self.configure() - cherrypy.server.unsubscribe() # disable default server - cherrypy.engine.start() - self.mgr.log.debug('Cherrypy engine started.') - self.mgr._kick_serve_loop() - # wait for the shutdown event - self.cherrypy_shutdown_event.wait() - self.cherrypy_shutdown_event.clear() - cherrypy.engine.stop() - cherrypy.server.httpserver = None - self.mgr.log.debug('Cherrypy engine stopped.') + self.server_adapter, self.agent_adapter = start_servers() + self.mgr.log.info('Cherrypy server started successfully.') except Exception as e: - self.mgr.log.error(f'Failed to run cephadm http server: {e}') + self.mgr.log.error(f'Failed to start cherrypy server: {e}') + if self.server_adapter: self.server_adapter.stop() + if self.agent_adapter: self.agent_adapter.stop() + return + + while not self.cherrypy_shutdown_event.is_set(): + if self.cherrypy_restart_event.wait(timeout=0.5): + self.cherrypy_restart_event.clear() + self.mgr.log.debug('Restarting cherrypy server...') + if self.server_adapter: + self.server_adapter.stop() + if self.agent_adapter: + self.agent_adapter.stop() + try: + self.server_adapter, self.agent_adapter = start_servers() + self.mgr.log.debug('Cherrypy server restarted successfully.') + except Exception as e: + self.mgr.log.error(f'Failed to restart cherrypy server: {e}') + continue + + if self.server_adapter: + self.server_adapter.stop() + if self.agent_adapter: + self.agent_adapter.stop() def shutdown(self) -> None: self.mgr.log.debug('Stopping cherrypy engine...') diff --git a/src/pybind/mgr/cephadm/services/service_discovery.py b/src/pybind/mgr/cephadm/services/service_discovery.py index 2c0478cb668..6d1d86837e7 100644 --- a/src/pybind/mgr/cephadm/services/service_discovery.py +++ b/src/pybind/mgr/cephadm/services/service_discovery.py @@ -1,11 +1,4 @@ -try: - import cherrypy - from cherrypy._cpserver import Server -except ImportError: - # to avoid sphinx build crash - class Server: # type: ignore - pass - +import cherrypy import logging import orchestrator # noqa @@ -28,16 +21,6 @@ if TYPE_CHECKING: from cephadm.module import CephadmOrchestrator -def cherrypy_filter(record: logging.LogRecord) -> bool: - blocked = [ - 'TLSV1_ALERT_DECRYPT_ERROR' - ] - msg = record.getMessage() - return not any([m for m in blocked if m in msg]) - - -logging.getLogger('cherrypy.error').addFilter(cherrypy_filter) -cherrypy.log.access_log.propagate = False logger = logging.getLogger(__name__) @@ -61,28 +44,34 @@ class ServiceDiscovery: def validate_password(self, realm: str, username: str, password: str) -> bool: return (password == self.password and username == self.username) + + def get_cherrypy_config(self, enable_auth: bool) -> Dict: + config = { + '/': { + 'request.dispatch': 4, + 'environment': 'production', + 'tools.gzip.on': True, + 'engine.autoreload.on': False, + } + } + if enable_auth: + config['/'].update({ + 'tools.auth_basic.on': True, + 'tools.auth_basic.realm': 'localhost', + 'tools.auth_basic.checkpassword': self.validate_password, + }) + return config - def configure_routes(self, server: Server, enable_auth: bool) -> None: + def configure_routes(self, root: 'Root') -> cherrypy.dispatch.RoutesDispatcher: ROUTES = [ - Route('index', '/', server.index), - Route('sd-config', '/prometheus/sd-config', server.get_sd_config), - Route('rules', '/prometheus/rules', server.get_prometheus_rules), + Route('index', '/', root.index), + Route('sd-config', '/prometheus/sd-config', root.get_sd_config), + Route('rules', '/prometheus/rules', root.get_prometheus_rules), ] d = cherrypy.dispatch.RoutesDispatcher() for route in ROUTES: d.connect(**route._asdict()) - if enable_auth: - conf = { - '/': { - 'request.dispatch': d, - 'tools.auth_basic.on': True, - 'tools.auth_basic.realm': 'localhost', - 'tools.auth_basic.checkpassword': self.validate_password - } - } - else: - conf = {'/': {'request.dispatch': d}} - cherrypy.tree.mount(None, '/sd', config=conf) + return d def enable_auth(self) -> None: self.username = self.mgr.get_store('service_discovery/root/username') @@ -93,7 +82,7 @@ class ServiceDiscovery: self.mgr.set_store('service_discovery/root/password', self.password) self.mgr.set_store('service_discovery/root/username', self.username) - def configure_tls(self, server: Server) -> None: + def configure_tls(self) -> Dict[str, str]: addr = self.mgr.get_mgr_ip() host = self.mgr.get_hostname() tls_pair = self.mgr.cert_mgr.generate_cert(host, addr, duration_in_days=CEPHADM_SVC_DISCOVERY_CERT_DURATION) @@ -106,40 +95,35 @@ class ServiceDiscovery: self.key_file.flush() # pkey_tmp must not be gc'ed verify_tls_files(self.cert_file.name, self.key_file.name) - - server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name + return { + 'cert': self.cert_file.name, + 'key': self.key_file.name, + } def configure(self, port: int, addr: str, enable_security: bool) -> None: # we create a new server to enforce TLS/SSL config refresh - self.root_server = Root(self.mgr, port, addr) + self.root_server = Root(self.mgr) self.root_server.ssl_certificate = None self.root_server.ssl_private_key = None + ssl_info = None if enable_security: self.enable_auth() - self.configure_tls(self.root_server) - self.configure_routes(self.root_server, enable_security) + ssl_info = self.configure_tls() + config = self.get_cherrypy_config(enable_security) + dispatcher = self.configure_routes(self.root_server) + config['/'].update({'request.dispatch': dispatcher}) + return config, ssl_info -class Root(Server): +class Root: # collapse everything to '/' def _cp_dispatch(self, vpath: str) -> 'Root': cherrypy.request.path = '' return self - def stop(self) -> None: - # we must call unsubscribe before stopping the server, - # otherwise the port is not released and we will get - # an exception when trying to restart it - self.unsubscribe() - super().stop() - - def __init__(self, mgr: "CephadmOrchestrator", port: int = 0, host: str = ''): + def __init__(self, mgr: "CephadmOrchestrator"): self.mgr = mgr - super().__init__() - self.socket_port = port - self.socket_host = host - self.subscribe() @cherrypy.expose def index(self) -> str: diff --git a/src/pybind/mgr/cherrypy_mgr.py b/src/pybind/mgr/cherrypy_mgr.py new file mode 100644 index 00000000000..b4a83f67a60 --- /dev/null +++ b/src/pybind/mgr/cherrypy_mgr.py @@ -0,0 +1,122 @@ +""" +CherryPyMgr is a utility class to encapsulate the CherryPy server instance +into a standalone component. Unlike standard cherrypy which relies on global state +and a single engine, CherryPyMgr allows for multiple independent server instances +to be created and managed within the same process. So we can run multiple servers +in each modules without worrying about their global state interfering with each other. + +Usage: + # Create a tree and mount your WSGI app on it + from cherrypy import _cptree + tree = _cptree.Tree() + tree.mount(my_wsgi_app, config=config) + + # Mount your WSGI app on the manager + adapter, app = CherryPyMgr.mount( + tree, + addr, + ssl_info={'cert': 'path/to/cert.pem', 'key': 'path/to/key.pem', 'context': ssl_context} + ) + + # The adapter can be used to stop the server when needed + adapter.stop() +""" +import logging +import cherrypy +from cherrypy.process.servers import ServerAdapter +from cheroot.wsgi import Server as WSGIServer +from cheroot.ssl.builtin import BuiltinSSLAdapter +from cherrypy._cptree import Tree +from typing import Any, Tuple, Optional, Dict + +logger = logging.getLogger(__name__) + + +def cherrypy_filter(record: logging.LogRecord) -> bool: + blocked = [ + 'TLSV1_ALERT_DECRYPT_ERROR' + ] + msg = record.getMessage() + return not any([m for m in blocked if m in msg]) + + +class CherryPyMgr: + @classmethod + def mount( + cls, + tree: Tree, + bind_addr: Tuple[str, int], + ssl_info: Optional[Dict[str, Any]] = None, + conf: Optional[Dict[str, Any]] = None + ) -> Tuple[ServerAdapter, Any]: + """ + :param bind_addr: Tuple (host, port) + :param ssl_info: Dict containing {'cert': path, 'key': path, 'context': ssl_context} + :param conf: Optional CherryPy config dict for the mounted app + """ + if not hasattr(cherrypy, '_mgr_engine_started'): + if hasattr(cherrypy, 'server'): + cherrypy.server.unsubscribe() + if hasattr(cherrypy.engine, 'autoreload'): + cherrypy.engine.autoreload.unsubscribe() + if hasattr(cherrypy.engine, 'signal_handler'): + cherrypy.engine.signal_handler.unsubscribe() + + cherrypy.config.update({ + 'engine.autoreload.on': False, + 'checker.on': False, + 'tools.log_headers.on': False, + 'log.screen': False + }) + try: + cherrypy.engine.start() + cherrypy._mgr_engine_started = True + logger.info('Cherrypy engine started successfully.') + except Exception as e: + logger.error(f'Failed to start cherrypy engine: {e}') + raise e + + cls.configure_logging() + adapter = cls.create_adapter(tree, bind_addr, ssl_info) + cls.subscribe_adapter(adapter) + adapter.start() + + return adapter, tree + + @staticmethod + def configure_logging() -> None: + cherrypy.log.access_log.propagate = False + cherrypy.log.error_log.propagate = False + + error_log = logging.getLogger('cherrypy.error') + + # make sure we only add the filter once + has_filter = any(f.__name__ == 'cherrypy_filter' for f in error_log.filters if hasattr(f, '__name__')) + if not has_filter: + error_log.addFilter(cherrypy_filter) + + @staticmethod + def create_adapter( + app: Any, + bind_addr: Tuple[str, int], + ssl_info: Optional[Dict[str, Any]] = None, + ) -> ServerAdapter: + server = WSGIServer( + bind_addr=bind_addr, + wsgi_app=app, + numthreads=30, + server_name='Ceph-Mgr' + ) + + if ssl_info: + adapter = BuiltinSSLAdapter(ssl_info['cert'], ssl_info['key']) + if ssl_info.get('context'): + adapter.context = ssl_info['context'] + server.ssl_adapter = adapter + + adapter = ServerAdapter(cherrypy.engine, server, bind_addr) + return adapter + + @staticmethod + def subscribe_adapter(adapter: ServerAdapter) -> None: + adapter.subscribe() diff --git a/src/pybind/mgr/dashboard/module.py b/src/pybind/mgr/dashboard/module.py index 9ce6d12ea50..a1eeb7e8810 100644 --- a/src/pybind/mgr/dashboard/module.py +++ b/src/pybind/mgr/dashboard/module.py @@ -14,6 +14,7 @@ import threading import time from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple from urllib.parse import urlparse +from cherrypy import _cptree from .controllers.multi_cluster import MultiCluster @@ -28,6 +29,7 @@ from mgr_module import CLIReadCommand, CLIWriteCommand, HandleCommandResult, \ MgrModule, MgrStandbyModule, NotifyType, Option, _get_localized_key from mgr_util import ServerConfigException, build_url, \ create_self_signed_cert, get_default_addr, verify_tls_files +from cherrypy_mgr import CherryPyMgr from . import mgr from .controllers import nvmeof # noqa # pylint: disable=unused-import @@ -90,10 +92,32 @@ class CherryPyConfig(object): def url_prefix(self): return self._url_prefix - @staticmethod - def update_cherrypy_config(config): - PLUGIN_MANAGER.hook.configure_cherrypy(config=config) - cherrypy.config.update(config) + def update_cherrypy_config(self, config): + if '/' not in config: + config['/'] = {} + + defaults = { + 'response.headers.server': 'Ceph-Dashboard', + 'response.headers.content-security-policy': "frame-ancestors 'self';", + 'response.headers.x-content-type-options': 'nosniff', + 'response.headers.strict-transport-security': 'max-age=63072000; includeSubDomains; preload', + 'engine.autoreload.on': False, + 'tools.request_logging.on': True, + 'tools.gzip.on': True, + 'tools.gzip.mime_types': [ + 'text/html', 'text/plain', 'application/json', + 'application/*+json', 'application/javascript', 'text/css' + ], + 'tools.json_in.on': True, + 'tools.json_in.force': True, + 'tools.plugin_hooks_filter_request.on': True, + 'error_page.default': json_error_page, + 'tools.sessions.on': True + } + + config['/'].update(defaults) + PLUGIN_MANAGER.hook.configure_cherrypy(config=config['/']) + config['/']['request.show_tracebacks'] = True # pylint: disable=too-many-branches def _configure(self): @@ -120,7 +144,8 @@ class CherryPyConfig(object): # Initialize custom handlers. cherrypy.tools.authenticate = AuthManagerTool() - configure_cors() + config = {'/': {}} + configure_cors(config) cherrypy.tools.plugin_hooks_filter_request = cherrypy.Tool( 'before_handler', lambda: PLUGIN_MANAGER.hook.filter_request_before_handler(request=cherrypy.request), @@ -129,31 +154,8 @@ class CherryPyConfig(object): cherrypy.tools.dashboard_exception_handler = HandlerWrapperTool(dashboard_exception_handler, priority=31) - cherrypy.log.access_log.propagate = False - cherrypy.log.error_log.propagate = False - - # Apply the 'global' CherryPy configuration. - config = { - 'engine.autoreload.on': False, - 'server.socket_host': server_addr, - 'server.socket_port': int(server_port), - 'error_page.default': json_error_page, - 'tools.request_logging.on': True, - 'tools.gzip.on': True, - 'tools.gzip.mime_types': [ - # text/html and text/plain are the default types to compress - 'text/html', 'text/plain', - # We also want JSON and JavaScript to be compressed - 'application/json', - 'application/*+json', - 'application/javascript', - 'text/css', - ], - 'tools.json_in.on': True, - 'tools.json_in.force': True, - 'tools.plugin_hooks_filter_request.on': True, - } + ssl_info = None if use_ssl: # SSL initialization cert = self.get_localized_store("crt") # type: ignore @@ -184,10 +186,11 @@ class CherryPyConfig(object): else: context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 | ssl.OP_NO_TLSv1_2 - config['server.ssl_module'] = 'builtin' - config['server.ssl_certificate'] = cert_fname - config['server.ssl_private_key'] = pkey_fname - config['server.ssl_context'] = context + ssl_info = { + 'cert': cert_fname, + 'key': pkey_fname, + 'context': context + } self.update_cherrypy_config(config) @@ -202,7 +205,7 @@ class CherryPyConfig(object): port=server_port, ) uri = f'{base_url}{self.url_prefix}/' - return uri + return uri, (server_addr, server_port), ssl_info, config def await_configuration(self): """ @@ -213,7 +216,7 @@ class CherryPyConfig(object): """ while not self._stopping.is_set(): try: - uri = self._configure() + uri, bind_addr, ssl_info, config = self._configure() except ServerConfigException as e: self.log.info( # type: ignore "Config not ready to serve, waiting: {0}".format(e) @@ -222,7 +225,7 @@ class CherryPyConfig(object): self._stopping.wait(5) else: self.log.info("Configured CherryPy, starting engine...") # type: ignore - return uri + return uri, bind_addr, ssl_info, config if TYPE_CHECKING: @@ -291,6 +294,9 @@ class Module(MgrModule, CherryPyConfig): def __init__(self, *args, **kwargs): super(Module, self).__init__(*args, **kwargs) CherryPyConfig.__init__(self) + self.shutdown_event = threading.Event() + self.server_adapter = None + # configure the dashboard's crypto caller. by default it will # use the remote caller to avoid pyo3 conflicts choose_crypto_caller(str(self.get_module_option('crypto_caller', ''))) @@ -341,10 +347,10 @@ class Module(MgrModule, CherryPyConfig): AuthManager.initialize() load_sso_db() - uri = self.await_configuration() - if uri is None: - # We were shut down while waiting + conf_result = self.await_configuration() + if conf_result is None: return + uri, bind_addr, ssl_info, app_config = conf_result # Publish the URI that others may use to access the service we're # about to start serving @@ -353,16 +359,26 @@ class Module(MgrModule, CherryPyConfig): mapper, parent_urls = Router.generate_routes(self.url_prefix) config = {} + self.update_cherrypy_config(config) for purl in parent_urls: - config[purl] = { - 'request.dispatch': mapper - } - - cherrypy.tree.mount(None, config=config) + # Ensure the key exists + if purl not in config: + config[purl] = {} + config[purl]['request.dispatch'] = mapper + + logger.info('Starting ceph dashboard server at %s', uri) + + tree = _cptree.Tree() + tree.mount(None, f"{self.url_prefix}/", config=config) + self.server_adapter, _ = CherryPyMgr.mount( + tree, + bind_addr, + ssl_info=ssl_info, + conf=config + ) PLUGIN_MANAGER.hook.setup() - cherrypy.engine.start() NotificationQueue.start_queue() TaskManager.init() logger.info('Engine started.') @@ -379,15 +395,20 @@ class Module(MgrModule, CherryPyConfig): # wait for the shutdown event self.shutdown_event.wait() self.shutdown_event.clear() + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() NotificationQueue.stop() - cherrypy.engine.stop() logger.info('Engine stopped') def shutdown(self): super(Module, self).shutdown() + self.shutdown_event.set() CherryPyConfig.shutdown(self) logger.info('Stopping engine...') - self.shutdown_event.set() + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() def _set_ssl_item(self, item_label: str, item_key: 'SslConfigKey' = 'crt', mgr_id: Optional[str] = None, inbuf: Optional[str] = None): @@ -569,6 +590,7 @@ class StandbyModule(MgrStandbyModule, CherryPyConfig): super(StandbyModule, self).__init__(*args, **kwargs) CherryPyConfig.__init__(self) self.shutdown_event = threading.Event() + self.server_adapter = None # configure the dashboard's crypto caller. by default it will # use the remote caller to avoid pyo3 conflicts choose_crypto_caller(str(self.get_module_option('crypto_caller', ''))) @@ -578,10 +600,10 @@ class StandbyModule(MgrStandbyModule, CherryPyConfig): mgr.init(self) def serve(self): - uri = self.await_configuration() - if uri is None: - # We were shut down while waiting + conf_result = self.await_configuration() + if conf_result is None: return + uri, bind_addr, ssl_info, app_config = conf_result module = self @@ -629,19 +651,31 @@ class StandbyModule(MgrStandbyModule, CherryPyConfig): status = module.get_module_option('standby_error_status_code', 500) raise cherrypy.HTTPError(status, message="Keep on looking") - cherrypy.tree.mount(Root(), "{}/".format(self.url_prefix), {}) + standby_conf = {} + self.update_cherrypy_config(standby_conf) + + standby_tree = _cptree.Tree() + standby_tree.mount(Root(), f"{self.url_prefix}/", config=standby_conf) self.log.info("Starting engine...") - cherrypy.engine.start() + self.server_adapter, _ = CherryPyMgr.mount( + standby_tree, + bind_addr, + ssl_info=ssl_info + ) self.log.info("Engine started...") # Wait for shutdown event self.shutdown_event.wait() self.shutdown_event.clear() - cherrypy.engine.stop() + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() self.log.info("Engine stopped.") def shutdown(self): - CherryPyConfig.shutdown(self) - self.log.info("Stopping engine...") self.shutdown_event.set() + CherryPyConfig.shutdown(self) + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() self.log.info("Stopped engine...") diff --git a/src/pybind/mgr/dashboard/services/auth/auth.py b/src/pybind/mgr/dashboard/services/auth/auth.py index 7f1cdb5887c..b3f966ecffe 100644 --- a/src/pybind/mgr/dashboard/services/auth/auth.py +++ b/src/pybind/mgr/dashboard/services/auth/auth.py @@ -22,13 +22,6 @@ from ..access_control import LocalAuthenticator, UserDoesNotExist if TYPE_CHECKING: from dashboard.services.sso import SsoDB -cherrypy.config.update({ - 'response.headers.server': 'Ceph-Dashboard', - 'response.headers.content-security-policy': "frame-ancestors 'self';", - 'response.headers.x-content-type-options': 'nosniff', - 'response.headers.strict-transport-security': 'max-age=63072000; includeSubDomains; preload' -}) - class AuthType(str, Enum): LOCAL = 'local' diff --git a/src/pybind/mgr/dashboard/tools.py b/src/pybind/mgr/dashboard/tools.py index fbf1e26bf41..53d3d840b83 100644 --- a/src/pybind/mgr/dashboard/tools.py +++ b/src/pybind/mgr/dashboard/tools.py @@ -840,7 +840,7 @@ def merge_list_of_dicts_by_key(target_list: list, source_list: list, key: str): return target_list -def configure_cors(url: str = ''): +def configure_cors(config, url: str = ''): """ Allow CORS requests if the cross_origin_url option is set. """ @@ -850,11 +850,9 @@ def configure_cors(url: str = ''): else: cross_origin_url = mgr.get_localized_module_option('cross_origin_url', '') if cross_origin_url: - cherrypy.tools.CORS = cherrypy.Tool('before_handler', cors_tool) - config = { - 'tools.CORS.on': True, - } - cherrypy.config.update(config) + if not hasattr(cherrypy.tools, 'CORS'): + cherrypy.tools.CORS = cherrypy.Tool('before_handler', cors_tool) + config['/']['tools.CORS.on'] = True def cors_tool(): diff --git a/src/pybind/mgr/prometheus/module.py b/src/pybind/mgr/prometheus/module.py index 836f64aa2b7..2cd6d237b38 100644 --- a/src/pybind/mgr/prometheus/module.py +++ b/src/pybind/mgr/prometheus/module.py @@ -10,6 +10,8 @@ import enum from collections import namedtuple from collections import OrderedDict from tempfile import NamedTemporaryFile +from cherrypy_mgr import CherryPyMgr +from cherrypy import _cptree from mgr_module import CLIReadCommand, MgrModule, MgrStandbyModule, PG_STATES, Option, ServiceInfoT, HandleCommandResult, CLIWriteCommand from mgr_util import get_default_addr, profile_method, build_url, test_port_allocation, PortAlreadyInUse @@ -64,11 +66,6 @@ def _wait_for_port_available( return False -cherrypy.config.update({ - 'response.headers.server': 'Ceph-Prometheus' -}) - - def health_status_to_number(status: str) -> int: if status == 'HEALTH_OK': return 0 @@ -1959,7 +1956,7 @@ class Module(MgrModule, OrchestratorClientMixin): self.collect() self.get_file_sd_config() - def configure(self, server_addr: str, server_port: int) -> None: + def configure(self) -> None: cmd = {'prefix': 'orch get-security-config'} ret, out, _ = self.mon_command(cmd) @@ -1967,7 +1964,7 @@ class Module(MgrModule, OrchestratorClientMixin): try: security_config = json.loads(out) if security_config.get('security_enabled', False): - self.setup_tls_config(server_addr, server_port) + self.setup_tls_config() return except Exception as e: self.log.exception( @@ -1977,29 +1974,27 @@ class Module(MgrModule, OrchestratorClientMixin): ) # In any error fallback to plain http mode - self.setup_default_config(server_addr, server_port) - - def setup_default_config(self, server_addr: str, server_port: int) -> None: - cherrypy.config.update({ - 'server.socket_host': server_addr, - 'server.socket_port': server_port, - 'engine.autoreload.on': False, - 'server.ssl_module': None, - 'server.ssl_certificate': None, - 'server.ssl_private_key': None, - 'tools.gzip.on': True, - 'tools.gzip.mime_types': [ - 'text/plain', - 'text/html', - 'application/json', - ], - 'tools.gzip.compress_level': 6, - }) - # Publish the URI that others may use to access the service we're about to start serving - self.set_uri(build_url(scheme='http', host=self.get_server_addr(), - port=server_port, path='/')) + return self.setup_default_config() + + def get_cherrypy_config(self): + config = { + '/': { + 'response.headers.server': 'Ceph-Prometheus', + 'tools.gzip.on': True, + 'tools.gzip.mime_types': [ + 'text/plain', + 'text/html', + 'application/json', + ], + 'tools.gzip.compress_level': 6, + } + } + return config + + def setup_default_config(self) -> None: + return self.get_cherrypy_config(), None, 'http' - def setup_tls_config(self, server_addr: str, server_port: int) -> None: + def setup_tls_config(self) -> None: # Temporarily disabling the verify function due to issues. # Please check verify_tls_files below to more information. # from mgr_util import verify_tls_files @@ -2027,25 +2022,12 @@ class Module(MgrModule, OrchestratorClientMixin): # Re-enable once the issue is resolved. # verify_tls_files(self.cert_file.name, self.key_file.name) cert_file_path, key_file_path = self.cert_file.name, self.key_file.name + ssl_info = { + 'cert': cert_file_path, + 'key': key_file_path + } - cherrypy.config.update({ - 'server.socket_host': server_addr, - 'server.socket_port': server_port, - 'engine.autoreload.on': False, - 'server.ssl_module': 'builtin', - 'server.ssl_certificate': cert_file_path, - 'server.ssl_private_key': key_file_path, - 'tools.gzip.on': True, - 'tools.gzip.mime_types': [ - 'text/plain', - 'text/html', - 'application/json', - ], - 'tools.gzip.compress_level': 6, - }) - # Publish the URI that others may use to access the service we're about to start serving - self.set_uri(build_url(scheme='https', host=self.get_server_addr(), - port=server_port, path='/')) + return self.get_cherrypy_config(), ssl_info, 'https' def serve(self) -> None: @@ -2124,36 +2106,39 @@ class Module(MgrModule, OrchestratorClientMixin): if self.stale_cache_strategy not in [self.STALE_CACHE_FAIL, self.STALE_CACHE_RETURN]: self.stale_cache_strategy = self.STALE_CACHE_FAIL - - server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr())) - server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT)) - self.log.info( - "server_addr: %s server_port: %s" % - (server_addr, server_port) - ) - + self.cache = cast(bool, self.get_localized_module_option('cache', True)) if self.cache: self.log.info('Cache enabled') self.metrics_thread.start() else: self.log.info('Cache disabled') + def start_server(): + server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr())) + server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT)) + + config, ssl_info, scheme = self.configure() + tree = _cptree.Tree() + tree.mount(Root(), "/", config=config) + + # Wait for port to be available before starting (handles standby->active transition) + if not _wait_for_port_available(self.log, server_addr, server_port): + self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway') + + self.log.info(f'Starting prometheus server on {server_addr}:{server_port}') + adapter, _ = CherryPyMgr.mount( + tree, + (server_addr, int(server_port)), + ssl_info=ssl_info + ) + self.set_uri(build_url(scheme=scheme, host=self.get_server_addr(), port=server_port, path='/')) + return adapter - self.configure(server_addr, server_port) - - cherrypy.tree.mount(Root(), "/") - - # Wait for port to be available before starting (handles standby->active transition) - if not _wait_for_port_available(self.log, server_addr, server_port): - self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway') - self.log.info('Starting engine...') try: - cherrypy.engine.start() + self.server_adapter = start_server() except Exception as e: - self.log.error(f'Failed to start engine: {e}') + self.log.error(f'Failed to start Prometheus: {e}') return - self.log.info('Engine started.') - # Main event loop: handle both shutdown and config change events while True: # Wait for either shutdown or config change event (check every 0.5s) @@ -2169,33 +2154,23 @@ class Module(MgrModule, OrchestratorClientMixin): self.config_change_event.clear() self.log.info('Restarting engine due to config change...') - # https://stackoverflow.com/questions/7254845/change-cherrypy-port-and-restart-web-server - # if we omit the line: cherrypy.server.httpserver = None - # then the cherrypy server is not restarted correctly - cherrypy.engine.stop() - cherrypy.server.httpserver = None - - # Re-read configuration - server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr())) - server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT)) - self.configure(server_addr, server_port) - - # Wait for port to be available before starting - if not _wait_for_port_available(self.log, server_addr, server_port): - self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway') + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() try: - cherrypy.engine.start() - self.log.info('Engine restarted.') + self.server_adapter = start_server() + self.log.debug('Prometheus restarted successfully.') except Exception as e: - self.log.error(f'Failed to restart engine: {e}') + self.log.error(f'Failed to restart Prometheus: {e}') # Cleanup on shutdown self.shutdown_event.clear() # tell metrics collection thread to stop collecting new metrics self.metrics_thread.stop() - cherrypy.engine.stop() - cherrypy.server.httpserver = None + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() self.log.info('Engine stopped.') self.shutdown_rbd_stats() # wait for the metrics collection thread to stop @@ -2252,12 +2227,6 @@ class StandbyModule(MgrStandbyModule): 'server_port', DEFAULT_PORT)) self.log.info("server_addr: %s server_port: %s" % (server_addr, server_port)) - cherrypy.config.update({ - 'server.socket_host': server_addr, - 'server.socket_port': server_port, - 'engine.autoreload.on': False, - 'request.show_tracebacks': False - }) module = self @@ -2283,23 +2252,38 @@ class StandbyModule(MgrStandbyModule): def metrics(self) -> str: return '' - cherrypy.tree.mount(Root(), '/', {}) + config = { + '/': { + 'response.headers.server': 'Ceph-Prometheus', + 'engine.autoreload.on': False, + } + } + tree = _cptree.Tree() + tree.mount(Root(), '/', config=config) # Wait for port to be available before starting if not _wait_for_port_available(self.log, server_addr, server_port): self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway') self.log.info('Starting engine...') - cherrypy.engine.start() + self.server_adapter, _ = CherryPyMgr.mount( + tree, + (server_addr, int(server_port)), + conf=config, + ) self.log.info('Engine started.') # Wait for shutdown event self.shutdown_event.wait() self.shutdown_event.clear() - cherrypy.engine.stop() - cherrypy.server.httpserver = None + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() self.log.info('Engine stopped.') def shutdown(self) -> None: self.log.info("Stopping engine...") - self.shutdown_event.set() - self.log.info("Stopped engine") + if hasattr(self, 'shutdown_event'): + self.shutdown_event.set() + if hasattr(self, 'server_adapter'): + self.server_adapter.stop() + self.server_adapter.unsubscribe() diff --git a/src/pybind/mgr/tox.ini b/src/pybind/mgr/tox.ini index c2deb627261..5cb6fffe11e 100644 --- a/src/pybind/mgr/tox.ini +++ b/src/pybind/mgr/tox.ini @@ -150,6 +150,7 @@ modules = localpool \ mgr_module.py \ mgr_util.py \ + cherrypy_mgr.py \ nfs \ object_format.py \ orchestrator \ @@ -198,7 +199,7 @@ modules = smb [isort] profile = black line_length = 78 -known_first_party = ceph,rados,rbd,cephfs,mgr,mgr_module,mgr_util,object_format +known_first_party = ceph,rados,rbd,cephfs,mgr,mgr_module,mgr_util,object_format,cherrypy_mgr known_typing = typing sections = FUTURE,TYPING,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER