]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: moving service_discovery.py to services module
authorRedouane Kachach <rkachach@ibm.com>
Tue, 15 Jul 2025 14:21:41 +0000 (16:21 +0200)
committerRedouane Kachach <rkachach@ibm.com>
Thu, 7 Aug 2025 14:39:46 +0000 (16:39 +0200)
Signed-off-by: Redouane Kachach <rkachach@ibm.com>
src/pybind/mgr/cephadm/http_server.py
src/pybind/mgr/cephadm/service_discovery.py [deleted file]
src/pybind/mgr/cephadm/services/service_discovery.py [new file with mode: 0644]
src/pybind/mgr/cephadm/tests/test_service_discovery.py

index efeb54e8a24f23696e3cf37a0aa424c270a7d815..baa00a3eb5ac833a6af788989e12c60a653ebb0a 100644 (file)
@@ -4,7 +4,7 @@ import logging
 from typing import TYPE_CHECKING
 
 from cephadm.agent import AgentEndpoint
-from cephadm.service_discovery import ServiceDiscovery
+from cephadm.services.service_discovery import ServiceDiscovery
 from mgr_util import test_port_allocation, PortAlreadyInUse
 from orchestrator import OrchestratorError
 
diff --git a/src/pybind/mgr/cephadm/service_discovery.py b/src/pybind/mgr/cephadm/service_discovery.py
deleted file mode 100644 (file)
index d823f67..0000000
+++ /dev/null
@@ -1,316 +0,0 @@
-try:
-    import cherrypy
-    from cherrypy._cpserver import Server
-except ImportError:
-    # to avoid sphinx build crash
-    class Server:  # type: ignore
-        pass
-
-import logging
-
-import orchestrator  # noqa
-from mgr_util import build_url
-from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional, IO
-from cephadm.services.nfs import NFSService
-from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
-import secrets
-from mgr_util import verify_tls_files
-import tempfile
-
-from cephadm.services.ingress import IngressSpec
-from cephadm.services.cephadmservice import CephExporterService
-from cephadm.services.nvmeof import NvmeofService
-from cephadm.services.service_registry import service_registry
-
-from ceph.deployment.service_spec import SMBSpec
-
-if TYPE_CHECKING:
-    from cephadm.module import CephadmOrchestrator
-
-
-def cherrypy_filter(record: logging.LogRecord) -> bool:
-    blocked = [
-        'TLSV1_ALERT_DECRYPT_ERROR'
-    ]
-    msg = record.getMessage()
-    return not any([m for m in blocked if m in msg])
-
-
-logging.getLogger('cherrypy.error').addFilter(cherrypy_filter)
-cherrypy.log.access_log.propagate = False
-logger = logging.getLogger(__name__)
-
-
-class Route(NamedTuple):
-    name: str
-    route: str
-    controller: Callable
-
-
-class ServiceDiscovery:
-
-    def __init__(self, mgr: "CephadmOrchestrator") -> None:
-        self.mgr = mgr
-        self.username: Optional[str] = None
-        self.password: Optional[str] = None
-        self.key_file: IO[bytes]
-        self.cert_file: IO[bytes]
-
-    def validate_password(self, realm: str, username: str, password: str) -> bool:
-        return (password == self.password and username == self.username)
-
-    def configure_routes(self, server: Server, enable_auth: bool) -> None:
-        ROUTES = [
-            Route('index', '/', server.index),
-            Route('sd-config', '/prometheus/sd-config', server.get_sd_config),
-            Route('rules', '/prometheus/rules', server.get_prometheus_rules),
-        ]
-        d = cherrypy.dispatch.RoutesDispatcher()
-        for route in ROUTES:
-            d.connect(**route._asdict())
-        if enable_auth:
-            conf = {
-                '/': {
-                    'request.dispatch': d,
-                    'tools.auth_basic.on': True,
-                    'tools.auth_basic.realm': 'localhost',
-                    'tools.auth_basic.checkpassword': self.validate_password
-                }
-            }
-        else:
-            conf = {'/': {'request.dispatch': d}}
-        cherrypy.tree.mount(None, '/sd', config=conf)
-
-    def enable_auth(self) -> None:
-        self.username = self.mgr.get_store('service_discovery/root/username')
-        self.password = self.mgr.get_store('service_discovery/root/password')
-        if not self.password or not self.username:
-            self.username = 'admin'  # TODO(redo): what should be the default username
-            self.password = secrets.token_urlsafe(20)
-            self.mgr.set_store('service_discovery/root/password', self.password)
-            self.mgr.set_store('service_discovery/root/username', self.username)
-
-    def configure_tls(self, server: Server) -> None:
-        addr = self.mgr.get_mgr_ip()
-        host = self.mgr.get_hostname()
-        cert, key = self.mgr.cert_mgr.generate_cert(host, addr)
-        self.cert_file = tempfile.NamedTemporaryFile()
-        self.cert_file.write(cert.encode('utf-8'))
-        self.cert_file.flush()  # cert_tmp must not be gc'ed
-
-        self.key_file = tempfile.NamedTemporaryFile()
-        self.key_file.write(key.encode('utf-8'))
-        self.key_file.flush()  # pkey_tmp must not be gc'ed
-
-        verify_tls_files(self.cert_file.name, self.key_file.name)
-
-        server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name
-
-    def configure(self, port: int, addr: str, enable_security: bool) -> None:
-        # we create a new server to enforce TLS/SSL config refresh
-        self.root_server = Root(self.mgr, port, addr)
-        self.root_server.ssl_certificate = None
-        self.root_server.ssl_private_key = None
-        if enable_security:
-            self.enable_auth()
-            self.configure_tls(self.root_server)
-        self.configure_routes(self.root_server, enable_security)
-
-
-class Root(Server):
-
-    # collapse everything to '/'
-    def _cp_dispatch(self, vpath: str) -> 'Root':
-        cherrypy.request.path = ''
-        return self
-
-    def stop(self) -> None:
-        # we must call unsubscribe before stopping the server,
-        # otherwise the port is not released and we will get
-        # an exception when trying to restart it
-        self.unsubscribe()
-        super().stop()
-
-    def __init__(self, mgr: "CephadmOrchestrator", port: int = 0, host: str = ''):
-        self.mgr = mgr
-        super().__init__()
-        self.socket_port = port
-        self.socket_host = host
-        self.subscribe()
-
-    @cherrypy.expose
-    def index(self) -> str:
-        return self.mgr.get_store('service_discovery/index') or '''<!DOCTYPE html>
-<html>
-<head><title>Cephadm HTTP Endpoint</title></head>
-<body>
-<h2>Cephadm Service Discovery Endpoints</h2>
-<p><a href='prometheus/sd-config?service=mgr-prometheus'>mgr/Prometheus http sd-config</a></p>
-<p><a href='prometheus/sd-config?service=alertmanager'>Alertmanager http sd-config</a></p>
-<p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
-<p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
-<p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
-<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
-<p><a href='prometheus/sd-config?service=nfs'>NFS http sd-config</a></p>
-<p><a href='prometheus/sd-config?service=smb'>SMB http sd-config</a></p>
-<p><a href='prometheus/rules'>Prometheus rules</a></p>
-</body>
-</html>'''
-
-    @cherrypy.expose
-    @cherrypy.tools.json_out()
-    def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for the specified service."""
-        if service == 'mgr-prometheus':
-            return self.prometheus_sd_config()
-        elif service == 'alertmanager':
-            return self.alertmgr_sd_config()
-        elif service == 'node-exporter':
-            return self.node_exporter_sd_config()
-        elif service == 'haproxy':
-            return self.haproxy_sd_config()
-        elif service == 'ceph-exporter':
-            return self.ceph_exporter_sd_config()
-        elif service == 'nvmeof':
-            return self.nvmeof_sd_config()
-        elif service == 'nfs':
-            return self.nfs_sd_config()
-        elif service == 'smb':
-            return self.smb_sd_config()
-        elif service.startswith("container"):
-            return self.container_sd_config(service)
-        else:
-            return []
-
-    def prometheus_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for prometheus service.
-        Targets should be a length one list containing only the active mgr
-        """
-        targets = []
-        mgr_daemons = self.mgr.cache.get_daemons_by_service('mgr')
-        host = service_registry.get_service('mgr').get_active_daemon(mgr_daemons).hostname or ''
-        fqdn = self.mgr.get_fqdn(host)
-        port = self.mgr.get_module_option_ex(
-            'prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT)
-        targets.append(f'{fqdn}:{port}')
-        return [{"targets": targets, "labels": {}}]
-
-    def alertmgr_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for mgr alertmanager service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
-            assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
-            port = dd.ports[0] if dd.ports else AlertmanagerService.DEFAULT_SERVICE_PORT
-            srv_entries.append('{}'.format(build_url(host=addr, port=port).lstrip('/')))
-        return [{"targets": srv_entries, "labels": {}}]
-
-    def node_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for node-exporter service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
-            assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
-            port = dd.ports[0] if dd.ports else NodeExporterService.DEFAULT_SERVICE_PORT
-            srv_entries.append({
-                'targets': [build_url(host=addr, port=port).lstrip('/')],
-                'labels': {'instance': dd.hostname}
-            })
-        return srv_entries
-
-    def haproxy_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for haproxy service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_type('ingress'):
-            if dd.service_name() in self.mgr.spec_store:
-                spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec)
-                assert dd.hostname is not None
-                if dd.daemon_type == 'haproxy':
-                    addr = self.mgr.inventory.get_addr(dd.hostname)
-                    srv_entries.append({
-                        'targets': [f"{build_url(host=addr, port=spec.monitor_port).lstrip('/')}"],
-                        'labels': {'ingress': dd.service_name(), 'instance': dd.hostname}
-                    })
-        return srv_entries
-
-    def ceph_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for ceph-exporter service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_service('ceph-exporter'):
-            assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
-            port = dd.ports[0] if dd.ports else CephExporterService.DEFAULT_SERVICE_PORT
-            srv_entries.append({
-                'targets': [build_url(host=addr, port=port).lstrip('/')],
-                'labels': {'instance': dd.hostname}
-            })
-        return srv_entries
-
-    def nvmeof_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for nvmeof service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_type('nvmeof'):
-            assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
-            port = NvmeofService.PROMETHEUS_PORT
-            srv_entries.append({
-                'targets': [build_url(host=addr, port=port).lstrip('/')],
-                'labels': {'instance': dd.hostname}
-            })
-        return srv_entries
-
-    def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for nfs service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_type('nfs'):
-            assert dd.hostname is not None
-            nfs = cast(NFSService, service_registry.get_service('nfs'))
-            monitoring_ip, monitoring_port = nfs.get_monitoring_details(dd.service_name(), dd.hostname)
-            addr = monitoring_ip or dd.ip or self.mgr.inventory.get_addr(dd.hostname)
-            port = monitoring_port or NFSService.DEFAULT_EXPORTER_PORT
-            srv_entries.append({
-                'targets': [build_url(host=addr, port=port).lstrip('/')],
-                'labels': {'instance': dd.hostname}
-            })
-        return srv_entries
-
-    def smb_sd_config(self) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for smb service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_type('smb'):
-            assert dd.hostname is not None
-            try:
-                spec = cast(SMBSpec, self.mgr.spec_store[dd.service_name()].spec)
-            except KeyError:
-                logger.warning("no spec found for %s", dd.service_name())
-                continue
-            # TODO: needs updating once ip control/colocation is present
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
-            port = spec.metrics_exporter_port()
-            srv_entries.append({
-                'targets': [build_url(host=addr, port=port).lstrip('/')],
-                'labels': {'instance': dd.hostname}
-            })
-        return srv_entries
-
-    def container_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
-        """Return <http_sd_config> compatible prometheus config for a container service."""
-        srv_entries = []
-        for dd in self.mgr.cache.get_daemons_by_service(service):
-            assert dd.hostname is not None
-            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
-            if not dd.ports:
-                continue
-            port = dd.ports[0]
-            srv_entries.append({
-                'targets': [build_url(host=addr, port=port).lstrip('/')],
-                'labels': {'instance': dd.hostname}
-            })
-        return srv_entries
-
-    @cherrypy.expose(alias='prometheus/rules')
-    def get_prometheus_rules(self) -> str:
-        """Return currently configured prometheus rules as Yaml."""
-        cherrypy.response.headers['Content-Type'] = 'text/plain'
-        with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
-            return f.read()
diff --git a/src/pybind/mgr/cephadm/services/service_discovery.py b/src/pybind/mgr/cephadm/services/service_discovery.py
new file mode 100644 (file)
index 0000000..d823f67
--- /dev/null
@@ -0,0 +1,316 @@
+try:
+    import cherrypy
+    from cherrypy._cpserver import Server
+except ImportError:
+    # to avoid sphinx build crash
+    class Server:  # type: ignore
+        pass
+
+import logging
+
+import orchestrator  # noqa
+from mgr_util import build_url
+from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional, IO
+from cephadm.services.nfs import NFSService
+from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
+import secrets
+from mgr_util import verify_tls_files
+import tempfile
+
+from cephadm.services.ingress import IngressSpec
+from cephadm.services.cephadmservice import CephExporterService
+from cephadm.services.nvmeof import NvmeofService
+from cephadm.services.service_registry import service_registry
+
+from ceph.deployment.service_spec import SMBSpec
+
+if TYPE_CHECKING:
+    from cephadm.module import CephadmOrchestrator
+
+
+def cherrypy_filter(record: logging.LogRecord) -> bool:
+    blocked = [
+        'TLSV1_ALERT_DECRYPT_ERROR'
+    ]
+    msg = record.getMessage()
+    return not any([m for m in blocked if m in msg])
+
+
+logging.getLogger('cherrypy.error').addFilter(cherrypy_filter)
+cherrypy.log.access_log.propagate = False
+logger = logging.getLogger(__name__)
+
+
+class Route(NamedTuple):
+    name: str
+    route: str
+    controller: Callable
+
+
+class ServiceDiscovery:
+
+    def __init__(self, mgr: "CephadmOrchestrator") -> None:
+        self.mgr = mgr
+        self.username: Optional[str] = None
+        self.password: Optional[str] = None
+        self.key_file: IO[bytes]
+        self.cert_file: IO[bytes]
+
+    def validate_password(self, realm: str, username: str, password: str) -> bool:
+        return (password == self.password and username == self.username)
+
+    def configure_routes(self, server: Server, enable_auth: bool) -> None:
+        ROUTES = [
+            Route('index', '/', server.index),
+            Route('sd-config', '/prometheus/sd-config', server.get_sd_config),
+            Route('rules', '/prometheus/rules', server.get_prometheus_rules),
+        ]
+        d = cherrypy.dispatch.RoutesDispatcher()
+        for route in ROUTES:
+            d.connect(**route._asdict())
+        if enable_auth:
+            conf = {
+                '/': {
+                    'request.dispatch': d,
+                    'tools.auth_basic.on': True,
+                    'tools.auth_basic.realm': 'localhost',
+                    'tools.auth_basic.checkpassword': self.validate_password
+                }
+            }
+        else:
+            conf = {'/': {'request.dispatch': d}}
+        cherrypy.tree.mount(None, '/sd', config=conf)
+
+    def enable_auth(self) -> None:
+        self.username = self.mgr.get_store('service_discovery/root/username')
+        self.password = self.mgr.get_store('service_discovery/root/password')
+        if not self.password or not self.username:
+            self.username = 'admin'  # TODO(redo): what should be the default username
+            self.password = secrets.token_urlsafe(20)
+            self.mgr.set_store('service_discovery/root/password', self.password)
+            self.mgr.set_store('service_discovery/root/username', self.username)
+
+    def configure_tls(self, server: Server) -> None:
+        addr = self.mgr.get_mgr_ip()
+        host = self.mgr.get_hostname()
+        cert, key = self.mgr.cert_mgr.generate_cert(host, addr)
+        self.cert_file = tempfile.NamedTemporaryFile()
+        self.cert_file.write(cert.encode('utf-8'))
+        self.cert_file.flush()  # cert_tmp must not be gc'ed
+
+        self.key_file = tempfile.NamedTemporaryFile()
+        self.key_file.write(key.encode('utf-8'))
+        self.key_file.flush()  # pkey_tmp must not be gc'ed
+
+        verify_tls_files(self.cert_file.name, self.key_file.name)
+
+        server.ssl_certificate, server.ssl_private_key = self.cert_file.name, self.key_file.name
+
+    def configure(self, port: int, addr: str, enable_security: bool) -> None:
+        # we create a new server to enforce TLS/SSL config refresh
+        self.root_server = Root(self.mgr, port, addr)
+        self.root_server.ssl_certificate = None
+        self.root_server.ssl_private_key = None
+        if enable_security:
+            self.enable_auth()
+            self.configure_tls(self.root_server)
+        self.configure_routes(self.root_server, enable_security)
+
+
+class Root(Server):
+
+    # collapse everything to '/'
+    def _cp_dispatch(self, vpath: str) -> 'Root':
+        cherrypy.request.path = ''
+        return self
+
+    def stop(self) -> None:
+        # we must call unsubscribe before stopping the server,
+        # otherwise the port is not released and we will get
+        # an exception when trying to restart it
+        self.unsubscribe()
+        super().stop()
+
+    def __init__(self, mgr: "CephadmOrchestrator", port: int = 0, host: str = ''):
+        self.mgr = mgr
+        super().__init__()
+        self.socket_port = port
+        self.socket_host = host
+        self.subscribe()
+
+    @cherrypy.expose
+    def index(self) -> str:
+        return self.mgr.get_store('service_discovery/index') or '''<!DOCTYPE html>
+<html>
+<head><title>Cephadm HTTP Endpoint</title></head>
+<body>
+<h2>Cephadm Service Discovery Endpoints</h2>
+<p><a href='prometheus/sd-config?service=mgr-prometheus'>mgr/Prometheus http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=alertmanager'>Alertmanager http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=nfs'>NFS http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=smb'>SMB http sd-config</a></p>
+<p><a href='prometheus/rules'>Prometheus rules</a></p>
+</body>
+</html>'''
+
+    @cherrypy.expose
+    @cherrypy.tools.json_out()
+    def get_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for the specified service."""
+        if service == 'mgr-prometheus':
+            return self.prometheus_sd_config()
+        elif service == 'alertmanager':
+            return self.alertmgr_sd_config()
+        elif service == 'node-exporter':
+            return self.node_exporter_sd_config()
+        elif service == 'haproxy':
+            return self.haproxy_sd_config()
+        elif service == 'ceph-exporter':
+            return self.ceph_exporter_sd_config()
+        elif service == 'nvmeof':
+            return self.nvmeof_sd_config()
+        elif service == 'nfs':
+            return self.nfs_sd_config()
+        elif service == 'smb':
+            return self.smb_sd_config()
+        elif service.startswith("container"):
+            return self.container_sd_config(service)
+        else:
+            return []
+
+    def prometheus_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for prometheus service.
+        Targets should be a length one list containing only the active mgr
+        """
+        targets = []
+        mgr_daemons = self.mgr.cache.get_daemons_by_service('mgr')
+        host = service_registry.get_service('mgr').get_active_daemon(mgr_daemons).hostname or ''
+        fqdn = self.mgr.get_fqdn(host)
+        port = self.mgr.get_module_option_ex(
+            'prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT)
+        targets.append(f'{fqdn}:{port}')
+        return [{"targets": targets, "labels": {}}]
+
+    def alertmgr_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for mgr alertmanager service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = dd.ports[0] if dd.ports else AlertmanagerService.DEFAULT_SERVICE_PORT
+            srv_entries.append('{}'.format(build_url(host=addr, port=port).lstrip('/')))
+        return [{"targets": srv_entries, "labels": {}}]
+
+    def node_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for node-exporter service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = dd.ports[0] if dd.ports else NodeExporterService.DEFAULT_SERVICE_PORT
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
+    def haproxy_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for haproxy service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_type('ingress'):
+            if dd.service_name() in self.mgr.spec_store:
+                spec = cast(IngressSpec, self.mgr.spec_store[dd.service_name()].spec)
+                assert dd.hostname is not None
+                if dd.daemon_type == 'haproxy':
+                    addr = self.mgr.inventory.get_addr(dd.hostname)
+                    srv_entries.append({
+                        'targets': [f"{build_url(host=addr, port=spec.monitor_port).lstrip('/')}"],
+                        'labels': {'ingress': dd.service_name(), 'instance': dd.hostname}
+                    })
+        return srv_entries
+
+    def ceph_exporter_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for ceph-exporter service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_service('ceph-exporter'):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = dd.ports[0] if dd.ports else CephExporterService.DEFAULT_SERVICE_PORT
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
+    def nvmeof_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for nvmeof service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_type('nvmeof'):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = NvmeofService.PROMETHEUS_PORT
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
+    def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for nfs service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_type('nfs'):
+            assert dd.hostname is not None
+            nfs = cast(NFSService, service_registry.get_service('nfs'))
+            monitoring_ip, monitoring_port = nfs.get_monitoring_details(dd.service_name(), dd.hostname)
+            addr = monitoring_ip or dd.ip or self.mgr.inventory.get_addr(dd.hostname)
+            port = monitoring_port or NFSService.DEFAULT_EXPORTER_PORT
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
+    def smb_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for smb service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_type('smb'):
+            assert dd.hostname is not None
+            try:
+                spec = cast(SMBSpec, self.mgr.spec_store[dd.service_name()].spec)
+            except KeyError:
+                logger.warning("no spec found for %s", dd.service_name())
+                continue
+            # TODO: needs updating once ip control/colocation is present
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = spec.metrics_exporter_port()
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
+    def container_sd_config(self, service: str) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for a container service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_service(service):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            if not dd.ports:
+                continue
+            port = dd.ports[0]
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
+    @cherrypy.expose(alias='prometheus/rules')
+    def get_prometheus_rules(self) -> str:
+        """Return currently configured prometheus rules as Yaml."""
+        cherrypy.response.headers['Content-Type'] = 'text/plain'
+        with open(self.mgr.prometheus_alerts_path, 'r', encoding='utf-8') as f:
+            return f.read()
index fd0eaff35b9a135d981d82c067a07d83cdb0d99d..32e20256626daf71708af1dfc1de2226384ace76 100644 (file)
@@ -1,5 +1,5 @@
 from unittest.mock import MagicMock
-from cephadm.service_discovery import Root
+from cephadm.services.service_discovery import Root
 from cephadm.services.service_registry import service_registry