]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr: isolated CherryPy to prevent global state sharing 67227/head
authorNizamudeen A <nia@redhat.com>
Thu, 5 Feb 2026 10:42:47 +0000 (16:12 +0530)
committerNizamudeen A <nia@redhat.com>
Thu, 26 Feb 2026 03:10:39 +0000 (08:40 +0530)
as the modules are now being loaded onto the main interpreter (see
https://github.com/ceph/ceph/pull/66244), the
cherrypy is getting hit with an issue where its global state is being
affecting all the modules updating the cherrypy config simultaneously in
the same tree.

So i am adding a CherryPyMgr which manages all the independent servers
that will be created across all modules. This CherryPyMgr will create
its own server instances by utilizing cherrypy's WSGI Server and
eliminates the global state sharing. Each module or app can create their
own tree and start an adapter which will open an independent server for
that app.

- also added a method to update the config in place so CORS urls can be
  configured without restarting servers.

Fixes: https://tracker.ceph.com/issues/74643, https://tracker.ceph.com/issues/74543, https://tracker.ceph.com/issues/74980
Signed-off-by: Nizamudeen A <nia@redhat.com>
17 files changed:
.github/labeler.yml
ceph.spec.in
debian/ceph-mgr.install
doc/_ext/ceph_commands.py
qa/tasks/mgr/test_prometheus.py
src/pybind/mgr/CMakeLists.txt
src/pybind/mgr/cephadm/agent.py
src/pybind/mgr/cephadm/http_server.py
src/pybind/mgr/cephadm/services/service_discovery.py
src/pybind/mgr/cephadm/tests/test_service_discovery.py
src/pybind/mgr/cherrypy_mgr.py [new file with mode: 0644]
src/pybind/mgr/dashboard/module.py
src/pybind/mgr/dashboard/services/auth/auth.py
src/pybind/mgr/dashboard/tools.py
src/pybind/mgr/prometheus/module.py
src/pybind/mgr/tests/test_cherrypy_mgr.py [new file with mode: 0644]
src/pybind/mgr/tox.ini

index 932b2a23018c7a3a35e75021607712ae96f6036a..1dcdf585963a0eb55a738cf82e9a2f9282e5dba8 100644 (file)
@@ -55,6 +55,7 @@ mgr:
   - src/pybind/mgr/ceph_module.pyi
   - src/pybind/mgr/mgr_module.py
   - src/pybind/mgr/mgr_util.py
+  - src/pybind/mgr/cherrypy_mgr.py
   - src/pybind/mgr/object_format.py
   - src/pybind/mgr/requirements.txt
   - src/pybind/mgr/tox.ini
index 18cd57b26833e0181627f59ff6f3c901c0b6d298..e9a3bac48dc5f16a4437e6402c044e693877f46c 100644 (file)
@@ -1920,6 +1920,7 @@ fi
 %{_datadir}/ceph/mgr/mgr_module.*
 %{_datadir}/ceph/mgr/mgr_util.*
 %{_datadir}/ceph/mgr/object_format.*
+%{_datadir}/ceph/mgr/cherrypy_mgr.*
 %{_unitdir}/ceph-mgr@.service
 %{_unitdir}/ceph-mgr.target
 %attr(750,ceph,ceph) %dir %{_localstatedir}/lib/ceph/mgr
index 11a4a9ce4e2adf6aaf69ba8dcf1191736f2791bc..803dcf6bcd8f7673619a0d12a200e2cec3c59427 100644 (file)
@@ -3,3 +3,4 @@ usr/bin/ceph-mgr
 usr/share/ceph/mgr/mgr_module.*
 usr/share/ceph/mgr/mgr_util.*
 usr/share/ceph/mgr/object_format.*
+usr/share/ceph/mgr/cherrypy_mgr.*
index 1440c6e2a064bd94898ca8ecfe111d6f067f84cf..f86ad94f4cc6b4c5875de39106b0876a9c3add14 100644 (file)
@@ -285,6 +285,14 @@ class CephMgrCommands(Directive):
         # make diskprediction_local happy
         mock_imports += ['numpy',
                          'scipy']
+        # make cephadm happy
+        mock_imports += ['cherrypy.process',
+                         'cherrypy.process.servers',
+                         'cherrypy._cptree',
+                         'cheroot',
+                         'cheroot.wsgi',
+                         'cheroot.ssl',
+                         'cheroot.ssl.builtin']
 
         for m in mock_imports:
             args = {}
index 376556ab30536e925b38d8e2286cc2a2aa7787d6..448868c13b34eb0b38c268d79f306feb1b7f18df 100644 (file)
@@ -54,7 +54,7 @@ class TestPrometheus(MgrTestCase):
         self._assign_ports("prometheus", "server_port")
         self._load_module("prometheus")
 
-        base_uri = self._get_uri("prometheus")
+        base_uri = self._get_uri("prometheus").rstrip("/")
 
         # This is a very simple smoke test to check that the module can
         # give us a 200 response to requests.  We're not testing that
@@ -72,8 +72,8 @@ class TestPrometheus(MgrTestCase):
             if r.status_code != 200:
                 failures.append(url)
 
-            log.info("{0}: {1} ({2} bytes)".format(
-                url, r.status_code, len(r.content)
+            log.info("{0}: {1} ({2} bytes)- Content: {3}".format(
+                url, r.status_code, len(r.content), r.text[:500]
             ))
 
         self.assertListEqual(failures, [])
index 9e900f859d701c1536b98266772eaff008ee3327..b83ba1b387ea237c4c860f6544253421fbb2250d 100644 (file)
@@ -58,5 +58,5 @@ set(mgr_modules
 install(DIRECTORY ${mgr_modules}
   DESTINATION ${CEPH_INSTALL_DATADIR}/mgr
   ${mgr_module_install_excludes})
-install(FILES mgr_module.py mgr_util.py object_format.py
+install(FILES mgr_module.py mgr_util.py object_format.py cherrypy_mgr.py
   DESTINATION ${CEPH_INSTALL_DATADIR}/mgr)
index cec4ab0ab81a50cad7d404d9cb77eeba97d85265..e95f368a909a0681af9cc4dccf1a480856fabf8b 100644 (file)
@@ -1,13 +1,5 @@
-try:
-    import cherrypy
-    from cherrypy._cpserver import Server
-except ImportError:
-    # to avoid sphinx build crash
-    class Server:  # type: ignore
-        pass
-
+import cherrypy
 import json
-import logging
 import socket
 import ssl
 import threading
@@ -27,24 +19,12 @@ from cephadm.services.cephadmservice import CephadmAgent
 from cephadm.tlsobject_types import TLSCredentials
 
 from urllib.error import HTTPError, URLError
-from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional, MutableMapping, IO
+from typing import Any, Dict, List, Set, TYPE_CHECKING, Optional, MutableMapping, IO, Tuple
 
 if TYPE_CHECKING:
     from cephadm.module import CephadmOrchestrator
 
 
-def cherrypy_filter(record: logging.LogRecord) -> bool:
-    blocked = [
-        'TLSV1_ALERT_DECRYPT_ERROR'
-    ]
-    msg = record.getMessage()
-    return not any([m for m in blocked if m in msg])
-
-
-logging.getLogger('cherrypy.error').addFilter(cherrypy_filter)
-cherrypy.log.access_log.propagate = False
-
-
 CEPHADM_AGENT_CERT_DURATION = (365 * 5)
 
 
@@ -57,13 +37,21 @@ class AgentEndpoint:
         self.key_file: IO[bytes]
         self.cert_file: IO[bytes]
 
-    def configure_routes(self) -> None:
-        conf = {'/': {'tools.trailing_slash.on': False}}
-
-        cherrypy.tree.mount(self.host_data, '/data', config=conf)
-        cherrypy.tree.mount(self.node_proxy_endpoint, '/node-proxy', config=conf)
-
-    def configure_tls(self, server: Server) -> None:
+    def get_cherrypy_config(self) -> Dict:
+        config = {
+            '/': {
+                'tools.trailing_slash.on': False
+            }
+        }
+        return config
+
+    def configure_routes(self, config: Dict) -> List[tuple]:
+        return [
+            (self.host_data, '/data', config),
+            (self.node_proxy_endpoint, '/node-proxy', config),
+        ]
+
+    def configure_tls(self) -> Dict[str, str]:
         self.mgr.cert_mgr.register_self_signed_cert_key_pair(CephadmAgent.TYPE)
         tls_pair = self._get_agent_certificates()
         self.cert_file = tempfile.NamedTemporaryFile()
@@ -75,7 +63,10 @@ class AgentEndpoint:
         self.key_file.flush()  # pkey_tmp must not be gc'ed
 
         verify_tls_files(self.cert_file.name, self.key_file.name)
-        server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name
+        return {
+            'cert': self.cert_file.name,
+            'key': self.key_file.name,
+        }
 
     def _get_agent_certificates(self) -> TLSCredentials:
         host = self.mgr.get_hostname()
@@ -90,19 +81,20 @@ class AgentEndpoint:
         while self.server_port <= max_port:
             try:
                 test_port_allocation(self.server_addr, self.server_port)
-                self.host_data.socket_port = self.server_port
                 self.mgr.log.debug(f'Cephadm agent endpoint using {self.server_port}')
                 return
             except PortAlreadyInUse:
                 self.server_port += 1
         self.mgr.log.error(f'Cephadm agent could not find free port in range {max_port - 150}-{max_port} and failed to start')
 
-    def configure(self) -> None:
-        self.host_data = HostData(self.mgr, self.server_port, self.server_addr)
-        self.configure_tls(self.host_data)
+    def configure(self) -> Tuple[Dict, Dict, List[tuple], tuple]:
+        self.host_data = HostData(self.mgr)
+        ssl_info = self.configure_tls()
         self.node_proxy_endpoint = NodeProxyEndpoint(self.mgr)
-        self.configure_routes()
+        config = self.get_cherrypy_config()
+        mount_specs = self.configure_routes(config)
         self.find_free_port()
+        return config, ssl_info, mount_specs, (self.server_addr, self.server_port)
 
 
 class NodeProxyEndpoint:
@@ -636,22 +628,11 @@ class NodeProxyEndpoint:
         return results
 
 
-class HostData(Server):
+class HostData:
     exposed = True
 
-    def __init__(self, mgr: "CephadmOrchestrator", port: int, host: str):
+    def __init__(self, mgr: "CephadmOrchestrator"):
         self.mgr = mgr
-        super().__init__()
-        self.socket_port = port
-        self.socket_host = host
-        self.subscribe()
-
-    def stop(self) -> None:
-        # we must call unsubscribe before stopping the server,
-        # otherwise the port is not released and we will get
-        # an exception when trying to restart it
-        self.unsubscribe()
-        super().stop()
 
     @cherrypy.tools.allow(methods=['POST'])
     @cherrypy.tools.json_in()
index baa00a3eb5ac833a6af788989e12c60a653ebb0a..ebc946c197edf9c452c00e3ea083d7bf5b9dcf79 100644 (file)
@@ -1,7 +1,10 @@
-import cherrypy
 import threading
-import logging
-from typing import TYPE_CHECKING
+import time
+import errno
+from typing import TYPE_CHECKING, Tuple, Any, Optional
+from cherrypy import _cptree
+from cherrypy.process.servers import ServerAdapter
+from cherrypy_mgr import CherryPyMgr
 
 from cephadm.agent import AgentEndpoint
 from cephadm.services.service_discovery import ServiceDiscovery
@@ -12,42 +15,20 @@ if TYPE_CHECKING:
     from cephadm.module import CephadmOrchestrator
 
 
-def cherrypy_filter(record: logging.LogRecord) -> bool:
-    blocked = [
-        'TLSV1_ALERT_DECRYPT_ERROR'
-    ]
-    msg = record.getMessage()
-    return not any([m for m in blocked if m in msg])
-
-
-logging.getLogger('cherrypy.error').addFilter(cherrypy_filter)
-cherrypy.log.access_log.propagate = False
-
-
 class CephadmHttpServer(threading.Thread):
     def __init__(self, mgr: "CephadmOrchestrator") -> None:
         self.mgr = mgr
         self.agent = AgentEndpoint(mgr)
         self.service_discovery = ServiceDiscovery(mgr)
         self.cherrypy_shutdown_event = threading.Event()
+        self.cherrypy_restart_event = threading.Event()
         self._service_discovery_port = self.mgr.service_discovery_port
         security_enabled, _, _ = self.mgr._get_security_config()
         self.security_enabled = security_enabled
+        self.agent_adapter = None
+        self.sd_adapter = None
         super().__init__(target=self.run)
 
-    def configure_cherrypy(self) -> None:
-        cherrypy.config.update({
-            'environment': 'production',
-            'engine.autoreload.on': False,
-        })
-
-    def configure(self) -> None:
-        self.configure_cherrypy()
-        self.agent.configure()
-        self.service_discovery.configure(self.mgr.service_discovery_port,
-                                         self.mgr.get_mgr_ip(),
-                                         self.security_enabled)
-
     def config_update(self) -> None:
         self.service_discovery_port = self.mgr.service_discovery_port
         security_enabled, _, _ = self.mgr._get_security_config()
@@ -76,27 +57,121 @@ class CephadmHttpServer(threading.Thread):
         self.restart()
 
     def restart(self) -> None:
-        cherrypy.engine.stop()
-        cherrypy.server.httpserver = None
-        self.configure()
-        cherrypy.engine.start()
+        self.cherrypy_restart_event.set()
+
+    def _stop_adapters(self) -> None:
+        adapters_to_stop = {
+            'service-discovery': getattr(self, 'sd_adapter', None),
+            'cephadm-agent': getattr(self, 'agent_adapter', None)
+        }
+        for name, adapter in adapters_to_stop.items():
+            if adapter:
+                try:
+                    adapter.stop()
+                    adapter.unsubscribe()
+                    CherryPyMgr.unregister(name)
+                except Exception as e:
+                    self.mgr.log.error(f'Failed to stop {name} adapter: {e}')
+
+        self.sd_adapter = None
+        self.agent_adapter = None
 
     def run(self) -> None:
+        def _mount_server(
+            name: str,
+            bind_addr: Tuple[str, int],
+            main_app: Any,
+            main_path: str,
+            main_config: dict,
+            ssl_info: Optional[dict] = None,
+            extra_mounts: Optional[Any] = None,
+            logger: Optional[Any] = None
+        ) -> ServerAdapter:
+            if logger:
+                logger.info(f'Starting {name} server on {bind_addr[0]}:{bind_addr[1]}...')
+
+            tree = _cptree.Tree()
+            tree.mount(main_app, main_path, config=main_config)
+
+            for app, path, conf in extra_mounts or []:
+                tree.mount(app, path, config=conf)
+
+            adapter, _ = CherryPyMgr.mount(
+                tree,
+                name,
+                bind_addr,
+                ssl_info=ssl_info
+            )
+            return adapter
+
+        def start_servers() -> None:
+            # start service discovery server
+            sd_port = self._service_discovery_port
+            sd_ip = self.mgr.get_mgr_ip()
+            sd_config, sd_ssl_info = self.service_discovery.configure(
+                sd_port,
+                sd_ip,
+                self.security_enabled
+            )
+            self.sd_adapter = _mount_server(
+                name='service-discovery',
+                bind_addr=(sd_ip, sd_port),
+                main_app=self.service_discovery,
+                main_path='/sd',
+                main_config=sd_config,
+                ssl_info=sd_ssl_info,
+                logger=self.mgr.log
+            )
+
+            # start agent server
+            agent_config, agent_ssl_info, agent_mounts, bind_addr = self.agent.configure()
+            self.agent_adapter = _mount_server(
+                name='cephadm-agent',
+                bind_addr=bind_addr,
+                main_app=self.agent,
+                main_path='/',
+                main_config=agent_config,
+                ssl_info=agent_ssl_info,
+                extra_mounts=agent_mounts,
+                logger=self.mgr.log
+            )
+
         try:
-            self.mgr.log.debug('Starting cherrypy engine...')
-            self.configure()
-            cherrypy.server.unsubscribe()  # disable default server
-            cherrypy.engine.start()
-            self.mgr.log.debug('Cherrypy engine started.')
-            self.mgr._kick_serve_loop()
-            # wait for the shutdown event
-            self.cherrypy_shutdown_event.wait()
-            self.cherrypy_shutdown_event.clear()
-            cherrypy.engine.stop()
-            cherrypy.server.httpserver = None
-            self.mgr.log.debug('Cherrypy engine stopped.')
+            start_servers()
+            self.mgr.log.info('Cherrypy server started successfully.')
         except Exception as e:
-            self.mgr.log.error(f'Failed to run cephadm http server: {e}')
+            self.mgr.log.error(f'Failed to start cherrypy server: {e}')
+            self._stop_adapters()
+            return
+
+        while not self.cherrypy_shutdown_event.is_set():
+            if self.cherrypy_restart_event.wait(timeout=0.5):
+                self.cherrypy_restart_event.clear()
+                self.mgr.log.debug('Restarting cherrypy server...')
+                self._stop_adapters()
+
+                retries = 10
+                for attempt in range(retries):
+                    try:
+                        start_servers()
+                        self.mgr.log.debug('Cherrypy server restarted successfully.')
+                        break
+                    except OSError as e:
+                        if e.errno == errno.EADDRINUSE:
+                            self.mgr.log.warning(f'Port already in use when restarting cherrypy server (attempt {attempt + 1}/{retries}): {e}')
+                            time.sleep(1)
+                        else:
+                            self.mgr.log.error(f'Failed to restart cherrypy server (attempt {attempt + 1}/{retries}): {e}')
+                            self._stop_adapters()
+                            break
+                    except Exception as e:
+                        self.mgr.log.error(f'Failed to restart cherrypy server (attempt {attempt + 1}/{retries}): {e}')
+                        self._stop_adapters()
+                        break
+                else:
+                    self.mgr.log.error('Exceeded maximum retries to restart cherrypy server. Please check the server status and resolve any port conflicts.')
+                    continue
+        self._stop_adapters()
 
     def shutdown(self) -> None:
         self.mgr.log.debug('Stopping cherrypy engine...')
index 2c0478cb6684e69d0d60d1c44fd9949829acfa5f..e874f90fe2ac95c6e68b33fe6fef5707c81e72de 100644 (file)
@@ -1,16 +1,9 @@
-try:
-    import cherrypy
-    from cherrypy._cpserver import Server
-except ImportError:
-    # to avoid sphinx build crash
-    class Server:  # type: ignore
-        pass
-
+import cherrypy
 import logging
 
 import orchestrator  # noqa
 from mgr_util import build_url
-from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional, IO
+from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional, IO, Tuple
 from cephadm.services.nfs import NFSService
 from cephadm.services.ingress import IngressService
 from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
@@ -28,16 +21,6 @@ if TYPE_CHECKING:
     from cephadm.module import CephadmOrchestrator
 
 
-def cherrypy_filter(record: logging.LogRecord) -> bool:
-    blocked = [
-        'TLSV1_ALERT_DECRYPT_ERROR'
-    ]
-    msg = record.getMessage()
-    return not any([m for m in blocked if m in msg])
-
-
-logging.getLogger('cherrypy.error').addFilter(cherrypy_filter)
-cherrypy.log.access_log.propagate = False
 logger = logging.getLogger(__name__)
 
 
@@ -62,27 +45,32 @@ class ServiceDiscovery:
     def validate_password(self, realm: str, username: str, password: str) -> bool:
         return (password == self.password and username == self.username)
 
-    def configure_routes(self, server: Server, enable_auth: bool) -> None:
+    def get_cherrypy_config(self, enable_auth: bool) -> Dict:
+        config = {
+            '/': {
+                'environment': 'production',
+                'tools.gzip.on': True,
+                'engine.autoreload.on': False,
+            }
+        }
+        if enable_auth:
+            config['/'].update({
+                'tools.auth_basic.on': True,
+                'tools.auth_basic.realm': 'localhost',
+                'tools.auth_basic.checkpassword': self.validate_password,
+            })
+        return config
+
+    def configure_routes(self, root: 'Root') -> cherrypy.dispatch.RoutesDispatcher:
         ROUTES = [
-            Route('index', '/', server.index),
-            Route('sd-config', '/prometheus/sd-config', server.get_sd_config),
-            Route('rules', '/prometheus/rules', server.get_prometheus_rules),
+            Route('index', '/', root.index),
+            Route('sd-config', '/prometheus/sd-config', root.get_sd_config),
+            Route('rules', '/prometheus/rules', root.get_prometheus_rules),
         ]
         d = cherrypy.dispatch.RoutesDispatcher()
         for route in ROUTES:
             d.connect(**route._asdict())
-        if enable_auth:
-            conf = {
-                '/': {
-                    'request.dispatch': d,
-                    'tools.auth_basic.on': True,
-                    'tools.auth_basic.realm': 'localhost',
-                    'tools.auth_basic.checkpassword': self.validate_password
-                }
-            }
-        else:
-            conf = {'/': {'request.dispatch': d}}
-        cherrypy.tree.mount(None, '/sd', config=conf)
+        return d
 
     def enable_auth(self) -> None:
         self.username = self.mgr.get_store('service_discovery/root/username')
@@ -93,7 +81,7 @@ class ServiceDiscovery:
             self.mgr.set_store('service_discovery/root/password', self.password)
             self.mgr.set_store('service_discovery/root/username', self.username)
 
-    def configure_tls(self, server: Server) -> None:
+    def configure_tls(self) -> Dict[str, str]:
         addr = self.mgr.get_mgr_ip()
         host = self.mgr.get_hostname()
         tls_pair = self.mgr.cert_mgr.generate_cert(host, addr, duration_in_days=CEPHADM_SVC_DISCOVERY_CERT_DURATION)
@@ -106,40 +94,33 @@ class ServiceDiscovery:
         self.key_file.flush()  # pkey_tmp must not be gc'ed
 
         verify_tls_files(self.cert_file.name, self.key_file.name)
+        return {
+            'cert': self.cert_file.name,
+            'key': self.key_file.name,
+        }
 
-        server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name
-
-    def configure(self, port: int, addr: str, enable_security: bool) -> None:
+    def configure(self, port: int, addr: str, enable_security: bool) -> Tuple[Dict, Optional[Dict[str, str]]]:
         # we create a new server to enforce TLS/SSL config refresh
-        self.root_server = Root(self.mgr, port, addr)
-        self.root_server.ssl_certificate = None
-        self.root_server.ssl_private_key = None
+        self.root_server = Root(self.mgr)
+        ssl_info = None
         if enable_security:
             self.enable_auth()
-            self.configure_tls(self.root_server)
-        self.configure_routes(self.root_server, enable_security)
+            ssl_info = self.configure_tls()
+        config = self.get_cherrypy_config(enable_security)
+        dispatcher = self.configure_routes(self.root_server)
+        config['/'].update({'request.dispatch': dispatcher})
+        return config, ssl_info
 
 
-class Root(Server):
+class Root:
 
     # collapse everything to '/'
     def _cp_dispatch(self, vpath: str) -> 'Root':
         cherrypy.request.path = ''
         return self
 
-    def stop(self) -> None:
-        # we must call unsubscribe before stopping the server,
-        # otherwise the port is not released and we will get
-        # an exception when trying to restart it
-        self.unsubscribe()
-        super().stop()
-
-    def __init__(self, mgr: "CephadmOrchestrator", port: int = 0, host: str = ''):
+    def __init__(self, mgr: "CephadmOrchestrator"):
         self.mgr = mgr
-        super().__init__()
-        self.socket_port = port
-        self.socket_host = host
-        self.subscribe()
 
     @cherrypy.expose
     def index(self) -> str:
index b560f81ce3192a95b774ac1a90d577c9ee370e66..ab86a6829dc5890db1968e43975613167203186d 100644 (file)
@@ -143,7 +143,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_prometheus(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('mgr-prometheus')
 
         # check response structure
@@ -157,7 +157,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_node_exporter(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('node-exporter')
 
         # check response structure
@@ -174,7 +174,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_alertmgr(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('alertmanager')
 
         # check response structure
@@ -188,7 +188,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_haproxy(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('haproxy')
 
         # check response structure
@@ -204,7 +204,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_ceph_exporter(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('ceph-exporter')
 
         # check response structure
@@ -218,7 +218,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_nvmeof(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('nvmeof')
 
         # check response structure
@@ -232,7 +232,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_nfs(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('nfs')
 
         # check response structure
@@ -246,7 +246,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_smb(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('smb')
 
         # check response structure
@@ -260,7 +260,7 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_custom_container(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('container.custom-container')
 
         # check response structure
@@ -274,6 +274,6 @@ class TestServiceDiscovery:
 
     def test_get_sd_config_invalid_service(self):
         mgr = FakeMgr()
-        root = Root(mgr, 5000, '0.0.0.0')
+        root = Root(mgr)
         cfg = root.get_sd_config('invalid-service')
         assert cfg == []
diff --git a/src/pybind/mgr/cherrypy_mgr.py b/src/pybind/mgr/cherrypy_mgr.py
new file mode 100644 (file)
index 0000000..b401a3e
--- /dev/null
@@ -0,0 +1,165 @@
+"""
+CherryPyMgr is a utility class to encapsulate the CherryPy server instance
+into a standalone component. Unlike standard cherrypy which relies on global state
+and a single engine, CherryPyMgr allows for multiple independent server instances
+to be created and managed within the same process. So we can run multiple servers
+in each modules without worrying about their global state interfering with each other.
+
+Usage:
+    # Create a tree and mount your WSGI app on it
+    from cherrypy import _cptree
+    tree = _cptree.Tree()
+    tree.mount(my_wsgi_app, config=config)
+
+    # Mount your WSGI app on the manager
+    adapter, app = CherryPyMgr.mount(
+        tree,
+        'my-app',
+        addr,
+        ssl_info={'cert': 'path/to/cert.pem', 'key': 'path/to/key.pem', 'context': ssl_context}
+        )
+
+    # The adapter can be used to stop the server when needed
+    adapter.stop()
+
+Each mounted app is stored in the class variable _trees, which allows us to retrieve
+the server config for each app when needed. This will let us dynamically update the
+server configuration for each app without affecting the others.
+
+Usage:
+    config = CherryPyMgr.get_server_config(name='my-app', mount_point='/')
+    if config:
+        # Do something with the config
+"""
+import logging
+import cherrypy
+from cherrypy.process.servers import ServerAdapter
+from cheroot.wsgi import Server as WSGIServer
+from cheroot.ssl.builtin import BuiltinSSLAdapter
+from cherrypy._cptree import Tree
+from typing import Any, Tuple, Optional, Dict
+
+logger = logging.getLogger(__name__)
+
+
+class CherryPyErrorFilter(logging.Filter):
+    """
+    Filters out specific, noisy CherryPy connection errors
+    that do not indicate a service failure.
+    """
+    def filter(self, record: logging.LogRecord) -> bool:
+        blocked = [
+            'TLSV1_ALERT_DECRYPT_ERROR'
+        ]
+        msg = record.getMessage()
+        return not any(m in msg for m in blocked)
+
+
+class CherryPyMgr:
+    _trees: Dict[str, Tree] = {}
+
+    @classmethod
+    def mount(
+        cls,
+        tree: Tree,
+        name: str,
+        bind_addr: Tuple[str, int],
+        ssl_info: Optional[Dict[str, Any]] = None
+    ) -> Tuple[ServerAdapter, Any]:
+        """
+        :param bind_addr: Tuple (host, port)
+        :param ssl_info: Dict containing {'cert': path, 'key': path, 'context': ssl_context}
+        """
+        cls._trees[name] = tree
+
+        is_engine_running = cherrypy.engine.state in (
+            cherrypy.engine.states.STARTED,
+            cherrypy.engine.states.STARTING
+        )
+
+        if not is_engine_running:
+            if hasattr(cherrypy, 'server'):
+                cherrypy.server.unsubscribe()
+            if hasattr(cherrypy.engine, 'autoreload'):
+                cherrypy.engine.autoreload.unsubscribe()
+            if hasattr(cherrypy.engine, 'signal_handler'):
+                cherrypy.engine.signal_handler.unsubscribe()
+
+            cherrypy.config.update({
+                'engine.autoreload.on': False,
+                'checker.on': False,
+                'tools.log_headers.on': False,
+                'log.screen': False
+            })
+            try:
+                cherrypy.engine.start()
+                logger.info('Cherrypy engine started successfully.')
+            except Exception as e:
+                logger.error(f'Failed to start cherrypy engine: {e}')
+                raise
+
+        cls.configure_logging()
+        adapter = cls.create_adapter(tree, bind_addr, ssl_info)
+        cls.subscribe_adapter(adapter)
+        adapter.start()
+
+        return adapter, tree
+
+    @classmethod
+    def get_server_config(
+        cls,
+        name: str,
+        mount_point: str = '/'
+    ) -> Optional[Dict]:
+        if name in cls._trees:
+            tree = cls._trees[name]
+            if mount_point in tree.apps:
+                return tree.apps[mount_point].config
+            if mount_point == '/' and '' in tree.apps:
+                return tree.apps[''].config
+            stripped = mount_point.rstrip('/')
+            if stripped in tree.apps:
+                return tree.apps[stripped].config
+        return None
+
+    @classmethod
+    def unregister(cls, name: str) -> None:
+        cls._trees.pop(name, None)
+
+    @staticmethod
+    def configure_logging() -> None:
+        cherrypy.log.access_log.propagate = False
+        cherrypy.log.error_log.propagate = False
+
+        error_log = logging.getLogger('cherrypy.error')
+
+        # make sure we only add the filter once
+        has_filter = any(isinstance(f, CherryPyErrorFilter) for f in error_log.filters)
+        if not has_filter:
+            error_log.addFilter(CherryPyErrorFilter())
+
+    @staticmethod
+    def create_adapter(
+        app: Any,
+        bind_addr: Tuple[str, int],
+        ssl_info: Optional[Dict[str, Any]] = None,
+    ) -> ServerAdapter:
+        server = WSGIServer(
+            bind_addr=bind_addr,
+            wsgi_app=app,
+            numthreads=30,
+            server_name='Ceph-Mgr'
+        )
+
+        if ssl_info:
+            ssl_adapter = BuiltinSSLAdapter(ssl_info['cert'], ssl_info['key'])
+            if ssl_info.get('context'):
+                ssl_adapter.context = ssl_info['context']
+            server.ssl_adapter = ssl_adapter
+
+        adapter = ServerAdapter(cherrypy.engine, server, bind_addr)
+        return adapter
+
+    @staticmethod
+    def subscribe_adapter(adapter: ServerAdapter) -> None:
+        adapter.subscribe()
index 713deca65e8028ec7b3aa88786c66ccbe3dc909a..090a6881e2c7ae0639e99328c29cd9d27af3ddcb 100644 (file)
@@ -15,6 +15,8 @@ import time
 from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple
 from urllib.parse import urlparse
 
+from cherrypy import _cptree
+
 from .controllers.multi_cluster import MultiCluster
 
 if TYPE_CHECKING:
@@ -24,6 +26,7 @@ if TYPE_CHECKING:
         from typing_extensions import Literal
 
 from ceph.cryptotools.select import choose_crypto_caller
+from cherrypy_mgr import CherryPyMgr
 from mgr_module import HandleCommandResult, MgrModule, MgrStandbyModule, \
     NotifyType, Option, _get_localized_key
 from mgr_util import ServerConfigException, build_url, \
@@ -83,6 +86,7 @@ class CherryPyConfig(object):
 
         self.cert_tmp = None
         self.pkey_tmp = None
+        self.app_name = 'ceph-dashboard'
 
     def shutdown(self):
         self._stopping.set()
@@ -91,10 +95,42 @@ class CherryPyConfig(object):
     def url_prefix(self):
         return self._url_prefix
 
-    @staticmethod
-    def update_cherrypy_config(config):
-        PLUGIN_MANAGER.hook.configure_cherrypy(config=config)
-        cherrypy.config.update(config)
+    def update_cherrypy_config(self, config):
+        defaults = {
+            'response.headers.server': 'Ceph-Dashboard',
+            'response.headers.content-security-policy': "frame-ancestors 'self';",
+            'response.headers.x-content-type-options': 'nosniff',
+            'response.headers.strict-transport-security': 'max-age=63072000; includeSubDomains; preload',  # noqa
+            'engine.autoreload.on': False,
+            'tools.request_logging.on': True,
+            'tools.gzip.on': True,
+            'tools.gzip.mime_types': [
+                'text/html', 'text/plain', 'application/json',
+                'application/*+json', 'application/javascript', 'text/css'
+            ],
+            'tools.json_in.on': True,
+            'tools.json_in.force': True,
+            'tools.plugin_hooks_filter_request.on': True,
+            'error_page.default': json_error_page,
+            'tools.sessions.on': True
+        }
+
+        def _apply_config(target_conf, is_startup=False):
+            if is_startup:
+                for key, value in defaults.items():
+                    target_conf.setdefault(key, value)
+
+            PLUGIN_MANAGER.hook.configure_cherrypy(config=target_conf)
+
+        if '/' not in config:
+            config['/'] = {}
+        _apply_config(config['/'], is_startup=True)
+
+        app_config = CherryPyMgr.get_server_config(
+            name=self.app_name, mount_point='/'
+        )
+        if app_config and '/' in app_config:
+            _apply_config(app_config['/'])
 
     # pylint: disable=too-many-branches
     def _configure(self):
@@ -120,8 +156,9 @@ class CherryPyConfig(object):
                       server_addr, server_port)
 
         # Initialize custom handlers.
+        config: Dict[str, Dict[str, Any]] = {'/': {}}
+
         cherrypy.tools.authenticate = AuthManagerTool()
-        configure_cors()
         cherrypy.tools.plugin_hooks_filter_request = cherrypy.Tool(
             'before_handler',
             lambda: PLUGIN_MANAGER.hook.filter_request_before_handler(request=cherrypy.request),
@@ -130,31 +167,7 @@ class CherryPyConfig(object):
         cherrypy.tools.dashboard_exception_handler = HandlerWrapperTool(dashboard_exception_handler,
                                                                         priority=31)
 
-        cherrypy.log.access_log.propagate = False
-        cherrypy.log.error_log.propagate = False
-
-        # Apply the 'global' CherryPy configuration.
-        config = {
-            'engine.autoreload.on': False,
-            'server.socket_host': server_addr,
-            'server.socket_port': int(server_port),
-            'error_page.default': json_error_page,
-            'tools.request_logging.on': True,
-            'tools.gzip.on': True,
-            'tools.gzip.mime_types': [
-                # text/html and text/plain are the default types to compress
-                'text/html', 'text/plain',
-                # We also want JSON and JavaScript to be compressed
-                'application/json',
-                'application/*+json',
-                'application/javascript',
-                'text/css',
-            ],
-            'tools.json_in.on': True,
-            'tools.json_in.force': True,
-            'tools.plugin_hooks_filter_request.on': True,
-        }
-
+        ssl_info = None
         if use_ssl:
             # SSL initialization
             cert = self.get_localized_store("crt")  # type: ignore
@@ -185,10 +198,11 @@ class CherryPyConfig(object):
             else:
                 context.options |= ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1 | ssl.OP_NO_TLSv1_2
 
-            config['server.ssl_module'] = 'builtin'
-            config['server.ssl_certificate'] = cert_fname
-            config['server.ssl_private_key'] = pkey_fname
-            config['server.ssl_context'] = context
+            ssl_info = {
+                'cert': cert_fname,
+                'key': pkey_fname,
+                'context': context
+            }
 
         self.update_cherrypy_config(config)
 
@@ -203,7 +217,7 @@ class CherryPyConfig(object):
             port=server_port,
         )
         uri = f'{base_url}{self.url_prefix}/'
-        return uri
+        return uri, (server_addr, server_port), ssl_info, config
 
     def await_configuration(self):
         """
@@ -214,7 +228,7 @@ class CherryPyConfig(object):
         """
         while not self._stopping.is_set():
             try:
-                uri = self._configure()
+                uri, bind_addr, ssl_info, config = self._configure()
             except ServerConfigException as e:
                 self.log.info(  # type: ignore
                     "Config not ready to serve, waiting: {0}".format(e)
@@ -223,7 +237,7 @@ class CherryPyConfig(object):
                 self._stopping.wait(5)
             else:
                 self.log.info("Configured CherryPy, starting engine...")  # type: ignore
-                return uri
+                return uri, bind_addr, ssl_info, config
 
 
 if TYPE_CHECKING:
@@ -293,6 +307,8 @@ class Module(MgrModule, CherryPyConfig):
     def __init__(self, *args, **kwargs):
         super(Module, self).__init__(*args, **kwargs)
         CherryPyConfig.__init__(self)
+        self.server_adapter = None
+
         # configure the dashboard's crypto caller. by default it will
         # use the remote caller to avoid pyo3 conflicts
         choose_crypto_caller(str(self.get_module_option('crypto_caller', '')))
@@ -343,10 +359,10 @@ class Module(MgrModule, CherryPyConfig):
         AuthManager.initialize()
         load_sso_db()
 
-        uri = self.await_configuration()
-        if uri is None:
-            # We were shut down while waiting
+        conf_result = self.await_configuration()
+        if conf_result is None:
             return
+        uri, bind_addr, ssl_info, config = conf_result
 
         # Publish the URI that others may use to access the service we're
         # about to start serving
@@ -354,17 +370,27 @@ class Module(MgrModule, CherryPyConfig):
 
         mapper, parent_urls = Router.generate_routes(self.url_prefix)
 
-        config = {}
+        self.update_cherrypy_config(config)
+        configure_cors(startup_config=config)
         for purl in parent_urls:
-            config[purl] = {
-                'request.dispatch': mapper
-            }
-
-        cherrypy.tree.mount(None, config=config)
+            # Ensure the key exists
+            if purl not in config:
+                config[purl] = {}
+            config[purl]['request.dispatch'] = mapper
+
+        logger.info('Starting ceph dashboard server at %s', uri)
+
+        tree = _cptree.Tree()
+        tree.mount(None, '/', config=config)
+        self.server_adapter, _ = CherryPyMgr.mount(
+            tree,
+            self.app_name,
+            bind_addr,
+            ssl_info=ssl_info
+        )
 
         PLUGIN_MANAGER.hook.setup()
 
-        cherrypy.engine.start()
         NotificationQueue.start_queue()
         TaskManager.init()
         logger.info('Engine started.')
@@ -381,8 +407,8 @@ class Module(MgrModule, CherryPyConfig):
         # wait for the shutdown event
         self.shutdown_event.wait()
         self.shutdown_event.clear()
+        self.stop_adapter()
         NotificationQueue.stop()
-        cherrypy.engine.stop()
         logger.info('Engine stopped')
 
     def shutdown(self):
@@ -391,6 +417,13 @@ class Module(MgrModule, CherryPyConfig):
         logger.info('Stopping engine...')
         self.shutdown_event.set()
 
+    def stop_adapter(self):
+        if self.server_adapter is not None:
+            self.server_adapter.stop()
+            self.server_adapter.unsubscribe()
+            self.server_adapter = None
+            CherryPyMgr.unregister(self.app_name)
+
     def _set_ssl_item(self, item_label: str, item_key: 'SslConfigKey' = 'crt',
                       mgr_id: Optional[str] = None, inbuf: Optional[str] = None):
         if inbuf is None:
@@ -493,8 +526,11 @@ class Module(MgrModule, CherryPyConfig):
     @DBCLICommand.Write("dashboard set-cross-origin-url")
     def set_cross_origin_url(self, value: str):
         cross_origin_urls = self.get_module_option('cross_origin_url', '')
-        cross_origin_urls_list = [url.strip()
-                                  for url in cross_origin_urls.split(',')]  # type: ignore
+        cross_origin_urls_list = [
+            url.strip()
+            for url in cross_origin_urls.split(',')  # type: ignore
+            if url.strip()
+        ]
         urls = [v.strip() for v in value.split(',')]
         for url in urls:
             if url in cross_origin_urls_list:
@@ -571,6 +607,7 @@ class StandbyModule(MgrStandbyModule, CherryPyConfig):
         super(StandbyModule, self).__init__(*args, **kwargs)
         CherryPyConfig.__init__(self)
         self.shutdown_event = threading.Event()
+        self.standby_adapter = None
         # configure the dashboard's crypto caller. by default it will
         # use the remote caller to avoid pyo3 conflicts
         choose_crypto_caller(str(self.get_module_option('crypto_caller', '')))
@@ -580,10 +617,10 @@ class StandbyModule(MgrStandbyModule, CherryPyConfig):
         mgr.init(self)
 
     def serve(self):
-        uri = self.await_configuration()
-        if uri is None:
-            # We were shut down while waiting
+        conf_result = self.await_configuration()
+        if conf_result is None:
             return
+        uri, bind_addr, ssl_info, config = conf_result
 
         module = self
 
@@ -631,19 +668,32 @@ class StandbyModule(MgrStandbyModule, CherryPyConfig):
                     status = module.get_module_option('standby_error_status_code', 500)
                     raise cherrypy.HTTPError(status, message="Keep on looking")
 
-        cherrypy.tree.mount(Root(), "{}/".format(self.url_prefix), {})
+        self.update_cherrypy_config(config)
+
+        standby_tree = _cptree.Tree()
+        standby_tree.mount(Root(), f"{self.url_prefix}/", config=config)
         self.log.info("Starting engine...")
-        cherrypy.engine.start()
+        self.standby_adapter, _ = CherryPyMgr.mount(
+            standby_tree,
+            'ceph-dashboard-standby',
+            bind_addr,
+            ssl_info=ssl_info
+        )
         self.log.info("Engine started...")
         # Wait for shutdown event
         self.shutdown_event.wait()
         self.shutdown_event.clear()
-        cherrypy.engine.stop()
+        self.stop_adapter()
         self.log.info("Engine stopped.")
 
     def shutdown(self):
         CherryPyConfig.shutdown(self)
-
         self.log.info("Stopping engine...")
         self.shutdown_event.set()
-        self.log.info("Stopped engine...")
+
+    def stop_adapter(self):
+        if self.standby_adapter is not None:
+            self.standby_adapter.stop()
+            self.standby_adapter.unsubscribe()
+            self.standby_adapter = None
+            CherryPyMgr.unregister('ceph-dashboard-standby')
index 7f1cdb5887c390408349b917a4d8e7e2677c2d14..b3f966ecffe4097a8af47008c39db50ff076d878 100644 (file)
@@ -22,13 +22,6 @@ from ..access_control import LocalAuthenticator, UserDoesNotExist
 if TYPE_CHECKING:
     from dashboard.services.sso import SsoDB
 
-cherrypy.config.update({
-    'response.headers.server': 'Ceph-Dashboard',
-    'response.headers.content-security-policy': "frame-ancestors 'self';",
-    'response.headers.x-content-type-options': 'nosniff',
-    'response.headers.strict-transport-security': 'max-age=63072000; includeSubDomains; preload'
-})
-
 
 class AuthType(str, Enum):
     LOCAL = 'local'
index fbf1e26bf41e968bdbfd7d683c6fadf8e7199d43..2fd24119b7f4428d4e4cf71a1cd533edc4a8bbbb 100644 (file)
@@ -12,6 +12,7 @@ from datetime import datetime, timedelta, timezone
 
 import cherrypy
 from ceph.utils import strtobool
+from cherrypy_mgr import CherryPyMgr
 from mgr_util import build_url
 
 from . import mgr
@@ -840,7 +841,7 @@ def merge_list_of_dicts_by_key(target_list: list, source_list: list, key: str):
     return target_list
 
 
-def configure_cors(url: str = ''):
+def configure_cors(url: str = '', startup_config: Optional[Dict] = None):
     """
     Allow CORS requests if the cross_origin_url option is set.
     """
@@ -850,11 +851,23 @@ def configure_cors(url: str = ''):
     else:
         cross_origin_url = mgr.get_localized_module_option('cross_origin_url', '')
     if cross_origin_url:
-        cherrypy.tools.CORS = cherrypy.Tool('before_handler', cors_tool)
-        config = {
-            'tools.CORS.on': True,
-        }
-        cherrypy.config.update(config)
+        if not hasattr(cherrypy.tools, 'CORS'):
+            cherrypy.tools.CORS = cherrypy.Tool('before_handler', cors_tool)
+
+        def _apply_cors(target_config):
+            if target_config is not None:
+                if '/' not in target_config or target_config['/'] is None:
+                    target_config['/'] = {}
+                target_config['/']['tools.CORS.on'] = True
+
+        _apply_cors(startup_config)
+
+        url_prefix = prepare_url_prefix(mgr.get_module_option('url_prefix', default=''))
+        config = CherryPyMgr.get_server_config(
+            name='ceph-dashboard',
+            mount_point=url_prefix
+        )
+        _apply_cors(config)
 
 
 def cors_tool():
index 26b0da41d6d078c8cb390f048845f61647bff9a2..17c75532e5c49a02831550fc4ccabf9ec0455e17 100644 (file)
@@ -6,10 +6,13 @@ import math
 import re
 import threading
 import time
+import errno
 import enum
 from collections import namedtuple
 from collections import OrderedDict
 from tempfile import NamedTemporaryFile
+from cherrypy_mgr import CherryPyMgr
+from cherrypy import _cptree
 
 from .cli import PrometheusCLICommand
 
@@ -66,11 +69,6 @@ def _wait_for_port_available(
     return False
 
 
-cherrypy.config.update({
-    'response.headers.server': 'Ceph-Prometheus'
-})
-
-
 def health_status_to_number(status: str) -> int:
     if status == 'HEALTH_OK':
         return 0
@@ -1962,7 +1960,7 @@ class Module(MgrModule, OrchestratorClientMixin):
         self.collect()
         self.get_file_sd_config()
 
-    def configure(self, server_addr: str, server_port: int) -> None:
+    def configure(self) -> Tuple[Dict[str, Dict[str, Any]], Optional[Dict[str, str]], str]:
         cmd = {'prefix': 'orch get-security-config'}
         ret, out, _ = self.mon_command(cmd)
 
@@ -1970,8 +1968,7 @@ class Module(MgrModule, OrchestratorClientMixin):
             try:
                 security_config = json.loads(out)
                 if security_config.get('security_enabled', False):
-                    self.setup_tls_config(server_addr, server_port)
-                    return
+                    return self.setup_tls_config()
             except Exception as e:
                 self.log.exception(
                     'Failed to setup cephadm based secure monitoring stack: %s\n'
@@ -1980,29 +1977,27 @@ class Module(MgrModule, OrchestratorClientMixin):
                 )
 
         # In any error fallback to plain http mode
-        self.setup_default_config(server_addr, server_port)
-
-    def setup_default_config(self, server_addr: str, server_port: int) -> None:
-        cherrypy.config.update({
-            'server.socket_host': server_addr,
-            'server.socket_port': server_port,
-            'engine.autoreload.on': False,
-            'server.ssl_module': None,
-            'server.ssl_certificate': None,
-            'server.ssl_private_key': None,
-            'tools.gzip.on': True,
-            'tools.gzip.mime_types': [
-                'text/plain',
-                'text/html',
-                'application/json',
-            ],
-            'tools.gzip.compress_level': 6,
-        })
-        # Publish the URI that others may use to access the service we're about to start serving
-        self.set_uri(build_url(scheme='http', host=self.get_server_addr(),
-                     port=server_port, path='/'))
+        return self.setup_default_config()
+
+    def get_cherrypy_config(self) -> Dict[str, Dict[str, Any]]:
+        config = {
+            '/': {
+                'response.headers.server': 'Ceph-Prometheus',
+                'tools.gzip.on': True,
+                'tools.gzip.mime_types': [
+                    'text/plain',
+                    'text/html',
+                    'application/json',
+                ],
+                'tools.gzip.compress_level': 6,
+            }
+        }
+        return config
+
+    def setup_default_config(self) -> Tuple[Dict[str, Dict[str, Any]], None, str]:
+        return self.get_cherrypy_config(), None, 'http'
 
-    def setup_tls_config(self, server_addr: str, server_port: int) -> None:
+    def setup_tls_config(self) -> Tuple[Dict[str, Dict[str, Any]], Optional[Dict[str, Any]], str]:
         # Temporarily disabling the verify function due to issues.
         # Please check verify_tls_files below to more information.
         # from mgr_util import verify_tls_files
@@ -2012,10 +2007,10 @@ class Module(MgrModule, OrchestratorClientMixin):
         ret, out, err = self.mon_command(cmd)
         if ret != 0:
             self.log.error(f'mon command to generate-certificates failed: {err}')
-            return
-        elif out is None:
+            return self.setup_default_config()
+        elif not out or not out.strip():
             self.log.error('mon command to generate-certificates failed to generate certificates')
-            return
+            return self.setup_default_config()
 
         cert_key = json.loads(out)
         self.cert_file = NamedTemporaryFile()
@@ -2030,25 +2025,12 @@ class Module(MgrModule, OrchestratorClientMixin):
         # Re-enable once the issue is resolved.
         # verify_tls_files(self.cert_file.name, self.key_file.name)
         cert_file_path, key_file_path = self.cert_file.name, self.key_file.name
+        ssl_info = {
+            'cert': cert_file_path,
+            'key': key_file_path
+        }
 
-        cherrypy.config.update({
-            'server.socket_host': server_addr,
-            'server.socket_port': server_port,
-            'engine.autoreload.on': False,
-            'server.ssl_module': 'builtin',
-            'server.ssl_certificate': cert_file_path,
-            'server.ssl_private_key': key_file_path,
-            'tools.gzip.on': True,
-            'tools.gzip.mime_types': [
-                'text/plain',
-                'text/html',
-                'application/json',
-            ],
-            'tools.gzip.compress_level': 6,
-        })
-        # Publish the URI that others may use to access the service we're about to start serving
-        self.set_uri(build_url(scheme='https', host=self.get_server_addr(),
-                     port=server_port, path='/'))
+        return self.get_cherrypy_config(), ssl_info, 'https'
 
     def serve(self) -> None:
 
@@ -2128,13 +2110,6 @@ class Module(MgrModule, OrchestratorClientMixin):
                                              self.STALE_CACHE_RETURN]:
             self.stale_cache_strategy = self.STALE_CACHE_FAIL
 
-        server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr()))
-        server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT))
-        self.log.info(
-            "server_addr: %s server_port: %s" %
-            (server_addr, server_port)
-        )
-
         self.cache = cast(bool, self.get_localized_module_option('cache', True))
         if self.cache:
             self.log.info('Cache enabled')
@@ -2142,21 +2117,33 @@ class Module(MgrModule, OrchestratorClientMixin):
         else:
             self.log.info('Cache disabled')
 
-        self.configure(server_addr, server_port)
+        def start_server() -> cherrypy.process.servers.ServerAdapter:
+            server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr()))
+            server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT))
 
-        cherrypy.tree.mount(Root(), "/")
+            config, ssl_info, scheme = self.configure()
+            tree = _cptree.Tree()
+            tree.mount(Root(), "/", config=config)
+
+            # Wait for port to be available before starting (handles standby->active transition)
+            if not _wait_for_port_available(self.log, server_addr, server_port):
+                self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway')
+
+            self.log.info(f'Starting prometheus server on {server_addr}:{server_port}')
+            adapter, _ = CherryPyMgr.mount(
+                tree,
+                'prometheus',
+                (server_addr, int(server_port)),
+                ssl_info=ssl_info
+            )
+            self.set_uri(build_url(scheme=scheme, host=self.get_server_addr(), port=server_port, path='/'))
+            return adapter
 
-        # Wait for port to be available before starting (handles standby->active transition)
-        if not _wait_for_port_available(self.log, server_addr, server_port):
-            self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway')
-        self.log.info('Starting engine...')
         try:
-            cherrypy.engine.start()
+            self.server_adapter = start_server()
         except Exception as e:
-            self.log.error(f'Failed to start engine: {e}')
+            self.log.error(f'Failed to start Prometheus: {e}')
             return
-        self.log.info('Engine started.')
-
         # Main event loop: handle both shutdown and config change events
         while True:
             # Wait for either shutdown or config change event (check every 0.5s)
@@ -2171,34 +2158,35 @@ class Module(MgrModule, OrchestratorClientMixin):
                 # Config changed, restart engine with new configuration
                 self.config_change_event.clear()
                 self.log.info('Restarting engine due to config change...')
-
-                # https://stackoverflow.com/questions/7254845/change-cherrypy-port-and-restart-web-server
-                # if we omit the line: cherrypy.server.httpserver = None
-                # then the cherrypy server is not restarted correctly
-                cherrypy.engine.stop()
-                cherrypy.server.httpserver = None
-
-                # Re-read configuration
-                server_addr = cast(str, self.get_localized_module_option('server_addr', get_default_addr()))
-                server_port = cast(int, self.get_localized_module_option('server_port', DEFAULT_PORT))
-                self.configure(server_addr, server_port)
-
-                # Wait for port to be available before starting
-                if not _wait_for_port_available(self.log, server_addr, server_port):
-                    self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway')
-
-                try:
-                    cherrypy.engine.start()
-                    self.log.info('Engine restarted.')
-                except Exception as e:
-                    self.log.error(f'Failed to restart engine: {e}')
+                self.stop_adapter()
+
+                retries = 10
+                for attempt in range(retries):
+                    try:
+                        self.server_adapter = start_server()
+                        self.log.debug('Prometheus restarted successfully.')
+                        break
+                    except OSError as e:
+                        if e.errno == errno.EADDRINUSE:
+                            self.log.warning(f'Port still in use after config change (attempt {attempt + 1}/{retries}), retrying...')
+                            time.sleep(1)
+                        else:
+                            self.log.error(f'Failed to restart Prometheus (attempt {attempt + 1}/{retries}): {e}')
+                            self.stop_adapter()
+                            break
+                    except Exception as e:
+                        self.log.error(f'Failed to restart Prometheus (attempt {attempt + 1}/{retries}): {e}')
+                        self.stop_adapter()
+                        break
+                else:
+                    self.log.error('Failed to restart Prometheus after multiple attempts.')
+                    continue
 
         # Cleanup on shutdown
         self.shutdown_event.clear()
         # tell metrics collection thread to stop collecting new metrics
         self.metrics_thread.stop()
-        cherrypy.engine.stop()
-        cherrypy.server.httpserver = None
+        self.stop_adapter()
         self.log.info('Engine stopped.')
         self.shutdown_rbd_stats()
         # wait for the metrics collection thread to stop
@@ -2208,6 +2196,13 @@ class Module(MgrModule, OrchestratorClientMixin):
         self.log.info('Stopping engine...')
         self.shutdown_event.set()
 
+    def stop_adapter(self) -> None:
+        if hasattr(self, 'server_adapter'):
+            self.server_adapter.stop()
+            self.server_adapter.unsubscribe()
+            CherryPyMgr.unregister('prometheus')
+            self.log.info('Server adapter stopped.')
+
     @PrometheusCLICommand.Read('healthcheck history ls')
     def _list_healthchecks(self, format: Format = Format.plain) -> HandleCommandResult:
         """List all the healthchecks being tracked
@@ -2255,12 +2250,6 @@ class StandbyModule(MgrStandbyModule):
             'server_port', DEFAULT_PORT))
         self.log.info("server_addr: %s server_port: %s" %
                       (server_addr, server_port))
-        cherrypy.config.update({
-            'server.socket_host': server_addr,
-            'server.socket_port': server_port,
-            'engine.autoreload.on': False,
-            'request.show_tracebacks': False
-        })
 
         module = self
 
@@ -2286,23 +2275,40 @@ class StandbyModule(MgrStandbyModule):
             def metrics(self) -> str:
                 return ''
 
-        cherrypy.tree.mount(Root(), '/', {})
+        config = {
+            '/': {
+                'response.headers.server': 'Ceph-Prometheus',
+                'engine.autoreload.on': False,
+            }
+        }
+        tree = _cptree.Tree()
+        tree.mount(Root(), '/', config=config)
 
         # Wait for port to be available before starting
         if not _wait_for_port_available(self.log, server_addr, server_port):
             self.log.warning(f'Port {server_port} still in use after waiting, attempting to start anyway')
         self.log.info('Starting engine...')
-        cherrypy.engine.start()
+        self.server_adapter, _ = CherryPyMgr.mount(
+            tree,
+            'prometheus-standby',
+            (server_addr, int(server_port))
+        )
         self.log.info('Engine started.')
 
         # Wait for shutdown event
         self.shutdown_event.wait()
         self.shutdown_event.clear()
-        cherrypy.engine.stop()
-        cherrypy.server.httpserver = None
+        self.stop_adapter()
         self.log.info('Engine stopped.')
 
     def shutdown(self) -> None:
         self.log.info("Stopping engine...")
         self.shutdown_event.set()
         self.log.info("Stopped engine")
+
+    def stop_adapter(self) -> None:
+        if hasattr(self, 'server_adapter'):
+            self.server_adapter.stop()
+            self.server_adapter.unsubscribe()
+            CherryPyMgr.unregister('prometheus-standby')
+            self.log.info('Server adapter stopped.')
diff --git a/src/pybind/mgr/tests/test_cherrypy_mgr.py b/src/pybind/mgr/tests/test_cherrypy_mgr.py
new file mode 100644 (file)
index 0000000..3472eb0
--- /dev/null
@@ -0,0 +1,112 @@
+import unittest
+from unittest import mock
+import cherrypy
+import cherrypy_mgr
+
+from cherrypy_mgr import CherryPyMgr
+
+class TestCherryPyMgr(unittest.TestCase):
+    def setUp(self):
+        CherryPyMgr._trees = {}
+        self.patcher_engine = mock.patch('cherrypy_mgr.cherrypy.engine')
+        self.mock_engine = self.patcher_engine.start()
+        self.mock_engine.state = cherrypy.engine.states.STOPPED
+
+        self.patcher_config = mock.patch('cherrypy_mgr.cherrypy.config')
+        self.mock_config = self.patcher_config.start()
+
+        self.patcher_server = mock.patch('cherrypy_mgr.cherrypy.server')
+        self.mock_server = self.patcher_server.start()
+
+    def tearDown(self):
+        self.patcher_engine.stop()
+        self.patcher_config.stop()
+        self.patcher_server.stop()
+        self.patcher_engine.stop()
+    
+    @mock.patch('cherrypy_mgr.ServerAdapter')
+    @mock.patch('cherrypy_mgr.WSGIServer')
+    def test_mount(self, mock_wsgi_server, mock_server_adapter):
+        tree = mock.MagicMock(spec=cherrypy._cptree.Tree)
+        name = 'test_app'
+        bind_addr = ('127.0.0.0', 8080)
+        ssl_info = None
+
+        adapter, _ = CherryPyMgr.mount(tree, name, bind_addr, ssl_info)
+
+        self.assertIn(name, CherryPyMgr._trees)
+        self.assertEqual(CherryPyMgr._trees[name], tree)
+        self.mock_server.unsubscribe.assert_called_once()
+        self.mock_engine.autoreload.unsubscribe.assert_called_once()
+        self.mock_engine.start.assert_called_once()
+        mock_wsgi_server.assert_called_with(
+            bind_addr=bind_addr,
+            wsgi_app=tree,
+            numthreads=30,
+            server_name='Ceph-Mgr'
+        )
+        mock_server_adapter.return_value.start.assert_called_once()
+
+    @mock.patch('cherrypy_mgr.ServerAdapter')
+    @mock.patch('cherrypy_mgr.WSGIServer')
+    def test_mount_engine_already_started(self, mock_wsgi_server, mock_server_adapter):
+        self.mock_engine.state = cherrypy.engine.states.STARTED
+
+        tree = mock.MagicMock(spec=cherrypy._cptree.Tree)
+        name = 'another_app'
+        bind_addr = ('127.0.0.1', 8082)
+
+        adapter, _ = CherryPyMgr.mount(tree, name, bind_addr)
+
+        self.mock_engine.start.assert_not_called()
+        mock_server_adapter.return_value.start.assert_called_once()
+
+    @mock.patch('cherrypy_mgr.BuiltinSSLAdapter')
+    @mock.patch('cherrypy_mgr.ServerAdapter')
+    @mock.patch('cherrypy_mgr.WSGIServer')
+    def test_mount_with_ssl(self, mock_wsgi_server, mock_server_adapter, mock_builtin_ssl_adapter):
+        tree = mock.MagicMock(spec=cherrypy._cptree.Tree)
+        name = 'ssl_app'
+        bind_addr = ('127.0.0.1', 8080)
+        ssl_info = {
+            'cert': '/path/to/cert.pem',
+            'key': '/path/to/key.pem',
+            'context': 'fake_context'
+        }
+
+        CherryPyMgr.mount(tree, name, bind_addr, ssl_info)
+
+        mock_wsgi_server.assert_called_once()
+        server_instance = mock_wsgi_server.return_value
+
+        mock_builtin_ssl_adapter.assert_called_once_with(ssl_info['cert'], ssl_info['key'])
+        self.assertEqual(mock_builtin_ssl_adapter.return_value.context, 'fake_context')
+        self.assertEqual(server_instance.ssl_adapter, mock_builtin_ssl_adapter.return_value)
+    
+    def test_get_server_config(self):
+        tree = cherrypy._cptree.Tree()
+        app_one = mock.Mock()
+        app_one.config = {'id': 'app_one'}
+        
+        app_two = mock.Mock()
+        app_two.config = {'id': 'app_two'}
+
+        tree.apps['/app_one'] = app_one
+        tree.apps['/app_two'] = app_two
+        CherryPyMgr._trees['test_app'] = tree
+
+        # get the config of app_two using different mount point formats
+        result = CherryPyMgr.get_server_config('test_app', '/app_two')
+        self.assertEqual(result, {'id': 'app_two'})
+        result = CherryPyMgr.get_server_config('test_app', '/app_two/')
+        self.assertEqual(result, {'id': 'app_two'})
+
+        # for app_one, test with mount point '/' and '/app_one'
+        result = CherryPyMgr.get_server_config('test_app', '/app_one')
+        self.assertEqual(result, {'id': 'app_one'})
+        result = CherryPyMgr.get_server_config('test_app', '/')
+        self.assertIsNone(result, {'id': 'app_one'})
+
+        # test non-existent app and mount point
+        self.assertIsNone(CherryPyMgr.get_server_config('ghost_app'))
+        self.assertIsNone(CherryPyMgr.get_server_config('test_app', '/missing'))
index c2deb627261ecb3f50da391ff5b68e90205ff88b..5cb6fffe11e791b7788b457f94dd0dfa1f2ab127 100644 (file)
@@ -150,6 +150,7 @@ modules =
     localpool \
     mgr_module.py \
     mgr_util.py \
+    cherrypy_mgr.py \
     nfs \
     object_format.py \
     orchestrator \
@@ -198,7 +199,7 @@ modules = smb
 [isort]
 profile = black
 line_length = 78
-known_first_party = ceph,rados,rbd,cephfs,mgr,mgr_module,mgr_util,object_format
+known_first_party = ceph,rados,rbd,cephfs,mgr,mgr_module,mgr_util,object_format,cherrypy_mgr
 known_typing = typing
 sections = FUTURE,TYPING,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER