]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: allow monitoring stack services to bind to network and port
authorDaniel Pivonka <dpivonka@redhat.com>
Wed, 12 May 2021 17:46:11 +0000 (13:46 -0400)
committerSebastian Wagner <sewagner@redhat.com>
Fri, 11 Jun 2021 09:51:09 +0000 (11:51 +0200)
Signed-off-by: Daniel Pivonka <dpivonka@redhat.com>
(cherry picked from commit adecb2d2dad5732d610d53181e41d195819bdbb8)

doc/cephadm/monitoring.rst
src/cephadm/cephadm
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2
src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2
src/pybind/mgr/cephadm/tests/test_cephadm.py
src/python-common/ceph/deployment/service_spec.py

index 1c50cca5bb35a2a5103aa509422e4b22e1568624..8bf120a16e3c362b8b5ca59868eb3b1011ae4c98 100644 (file)
@@ -145,6 +145,24 @@ configuration to be updated.
 The ``reconfig`` command also takes care of setting the right URL for Ceph
 Dashboard.
 
+Networks and Ports
+~~~~~~~~~~~~~~~~~~
+
+All monitoring services can have the network and port they bind to configured with a yaml service specification
+
+example spec file:
+
+.. code-block:: yaml
+
+    service_type: grafana
+    service_name: grafana
+    placement:
+      count: 1
+    networks:
+    - 192.169.142.0/24
+    spec:
+      port: 4200
+
 Using custom images
 ~~~~~~~~~~~~~~~~~~~
 
index c26efc26a605d6677c765a646633975dd958fba8..107f70565211677df44896a5ca0414269ca46264 100755 (executable)
@@ -279,7 +279,6 @@ class Monitoring(object):
             'args': [
                 '--config.file=/etc/prometheus/prometheus.yml',
                 '--storage.tsdb.path=/prometheus',
-                '--web.listen-address=:{}'.format(port_map['prometheus'][0]),
             ],
             'config-json-files': [
                 'prometheus.yml',
@@ -310,7 +309,6 @@ class Monitoring(object):
             'cpus': '2',
             'memory': '2GB',
             'args': [
-                '--web.listen-address=:{}'.format(port_map['alertmanager'][0]),
                 '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]),
             ],
             'config-json-files': [
@@ -2121,6 +2119,17 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id):
     elif daemon_type in Monitoring.components:
         metadata = Monitoring.components[daemon_type]
         r += metadata.get('args', list())
+        # set ip and port to bind to for nodeexporter,alertmanager,prometheus
+        if daemon_type != 'grafana':
+            ip = ''
+            port = Monitoring.port_map[daemon_type][0]
+            if 'meta_json' in ctx and ctx.meta_json:
+                meta = json.loads(ctx.meta_json) or {}
+                if 'ip' in meta and meta['ip']:
+                    ip = meta['ip']
+                if 'ports' in meta and meta['ports']:
+                    port = meta['ports'][0]
+            r += [f'--web.listen-address={ip}:{port}']
         if daemon_type == 'alertmanager':
             config = get_parm(ctx.config_json)
             peers = config.get('peers', list())  # type: ignore
@@ -2917,15 +2926,7 @@ class Firewalld(object):
 def update_firewalld(ctx, daemon_type):
     # type: (CephadmContext, str) -> None
     firewall = Firewalld(ctx)
-
     firewall.enable_service_for(daemon_type)
-
-    fw_ports = []
-
-    if daemon_type in Monitoring.port_map.keys():
-        fw_ports.extend(Monitoring.port_map[daemon_type])  # prometheus etc
-
-    firewall.open_ports(fw_ports)
     firewall.apply_rules()
 
 
@@ -4318,9 +4319,6 @@ def command_deploy(ctx):
     elif daemon_type in Monitoring.components:
         # monitoring daemon - prometheus, grafana, alertmanager, node-exporter
         # Default Checks
-        if not ctx.reconfig and not redeploy:
-            daemon_ports.extend(Monitoring.port_map[daemon_type])
-
         # make sure provided config-json is sufficient
         config = get_parm(ctx.config_json)  # type: ignore
         required_files = Monitoring.components[daemon_type].get('config-json-files', list())
index f749f0559517885f4f38f086af5c83f26fd74221..c32a1a1ee6bbd366d7b3217e481ea549b515e1a0 100644 (file)
@@ -30,7 +30,9 @@ class GrafanaService(CephadmService):
         prom_services = []  # type: List[str]
         for dd in self.mgr.cache.get_daemons_by_service('prometheus'):
             assert dd.hostname is not None
-            prom_services.append(dd.hostname)
+            addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+            port = dd.ports[0] if dd.ports else 9095
+            prom_services.append(addr + ':' + str(port))
             deps.append(dd.name())
         grafana_data_sources = self.mgr.template.render(
             'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services})
@@ -53,7 +55,10 @@ class GrafanaService(CephadmService):
             })
 
         grafana_ini = self.mgr.template.render(
-            'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT})
+            'services/grafana/grafana.ini.j2', {
+                'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT,
+                'http_addr': daemon_spec.ip if daemon_spec.ip else ''
+            })
 
         config_file = {
             'files': {
@@ -76,8 +81,9 @@ class GrafanaService(CephadmService):
         # TODO: signed cert
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        service_url = 'https://{}:{}'.format(
-            self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
+        addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+        port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+        service_url = 'https://{}:{}'.format(addr, port)
         self._set_service_url_on_dashboard(
             'Grafana',
             'dashboard get-grafana-api-url',
@@ -170,8 +176,9 @@ class AlertmanagerService(CephadmService):
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname),
-                                            self.DEFAULT_SERVICE_PORT)
+        addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+        port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+        service_url = 'http://{}:{}'.format(addr, port)
         self._set_service_url_on_dashboard(
             'AlertManager',
             'dashboard get-alertmanager-api-host',
@@ -232,10 +239,11 @@ class PrometheusService(CephadmService):
         for dd in self.mgr.cache.get_daemons_by_service('node-exporter'):
             assert dd.hostname is not None
             deps.append(dd.name())
-            addr = self.mgr.inventory.get_addr(dd.hostname)
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = str(dd.ports[0]) if dd.ports else '9100'
             nodes.append({
                 'hostname': dd.hostname,
-                'url': addr.split(':')[0] + ':9100'
+                'url': addr.split(':')[0] + ':' + port
             })
 
         # scrape alert managers
@@ -243,8 +251,9 @@ class PrometheusService(CephadmService):
         for dd in self.mgr.cache.get_daemons_by_service('alertmanager'):
             assert dd.hostname is not None
             deps.append(dd.name())
-            addr = self.mgr.inventory.get_addr(dd.hostname)
-            alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0]))
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = str(dd.ports[0]) if dd.ports else '9093'
+            alertmgr_targets.append("'{}:{}'".format(addr.split(':')[0], port))
 
         # scrape haproxies
         haproxy_targets = []
@@ -293,8 +302,9 @@ class PrometheusService(CephadmService):
     def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None:
         dd = self.get_active_daemon(daemon_descrs)
         assert dd.hostname is not None
-        service_url = 'http://{}:{}'.format(
-            self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT)
+        addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname)
+        port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT
+        service_url = 'http://{}:{}'.format(addr, port)
         self._set_service_url_on_dashboard(
             'Prometheus',
             'dashboard get-prometheus-api-host',
index c539cfc6f65909a0ed2e639786bad30213ea53fe..8946cac0a09895c919636bd57179d0644c55cbca 100644 (file)
@@ -11,7 +11,7 @@ datasources:
     type: 'prometheus'
     access: 'proxy'
     orgId: 1
-    url: 'http://{{ host }}:9095'
+    url: 'http://{{ host }}'
     basicAuth: false
     isDefault: {{ 'true' if loop.first else 'false' }}
     editable: false
index 51aff3f9fb5cb1b1eef79550f7e3e8f8aa33031a..3d6303f4466c00acb406eed95386750879f4c0f8 100644 (file)
@@ -11,6 +11,7 @@
   cert_file = /etc/grafana/certs/cert_file
   cert_key = /etc/grafana/certs/cert_key
   http_port = {{ http_port }}
+  http_addr = {{ http_addr }}
 [security]
   admin_user = admin
   admin_password = admin
index 84c4cb5f6a6d360a2a1d8099e0f1f5712740439e..e84a5cd8c4e54cf1aca7014b92cfef2b21e63b61 100644 (file)
@@ -3,6 +3,8 @@ from contextlib import contextmanager
 
 import pytest
 
+import yaml
+
 from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection
 from cephadm.serve import CephadmServe
 from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims
@@ -298,6 +300,38 @@ class TestCephadm(object):
                     + '"keyring": "", "files": {"config": "[mon.test]\\npublic network = 127.0.0.0/8\\n"}}',
                     image='')
 
+    @mock.patch("cephadm.serve.CephadmServe._run_cephadm")
+    def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator):
+        _run_cephadm.return_value = ('{}', '', 0)
+
+        with with_host(cephadm_module, 'test'):
+
+            yaml_str = """service_type: alertmanager
+service_name: alertmanager
+placement:
+    count: 1
+spec:
+    port: 4200
+"""
+            yaml_file = yaml.safe_load(yaml_str)
+            spec = ServiceSpec.from_json(yaml_file)
+
+            with mock.patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])):
+                with with_service(cephadm_module, spec):
+
+                    CephadmServe(cephadm_module)._check_daemons()
+
+                    _run_cephadm.assert_called_with(
+                        'test', 'alertmanager.test', 'deploy', [
+                            '--name', 'alertmanager.test',
+                            '--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}',
+                            '--config-json', '-',
+                            '--tcp-ports', '4200 9094',
+                            '--reconfig'
+                        ],
+                        stdin='{}',
+                        image='')
+
     @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}'))
     def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator):
         with with_host(cephadm_module, 'test'):
index c9429bbbf2cddabdf6d16b3be83b6deb0bb833ce..2a4c098c344bf776978b55c4870730874bf5db89 100644 (file)
@@ -433,6 +433,9 @@ class ServiceSpec(object):
             'alertmanager': AlertManagerSpec,
             'ingress': IngressSpec,
             'container': CustomContainerSpec,
+            'grafana': MonitoringSpec,
+            'node-exporter': MonitoringSpec,
+            'prometheus': MonitoringSpec,
         }.get(service_type, cls)
         if ret == ServiceSpec and not service_type:
             raise SpecValidationError('Spec needs a "service_type" key.')
@@ -852,6 +855,7 @@ class AlertManagerSpec(ServiceSpec):
                  user_data: Optional[Dict[str, Any]] = None,
                  config: Optional[Dict[str, str]] = None,
                  networks: Optional[List[str]] = None,
+                 port: Optional[int] = None,
                  ):
         assert service_type == 'alertmanager'
         super(AlertManagerSpec, self).__init__(
@@ -874,6 +878,23 @@ class AlertManagerSpec(ServiceSpec):
         #                        added to the default receivers'
         #                        <webhook_configs> configuration.
         self.user_data = user_data or {}
+        self.port = port
+
+    def get_port_start(self) -> List[int]:
+        return [self.get_port(), 9094]
+
+    def get_port(self) -> int:
+        if self.port:
+            return self.port
+        else:
+            return 9093
+
+    def validate(self) -> None:
+        super(AlertManagerSpec, self).validate()
+
+        if self.port == 9094:
+            raise SpecValidationError(
+                'Port 9094 is reserved for AlertManager cluster listen address')
 
 
 yaml.add_representer(AlertManagerSpec, ServiceSpec.yaml_representer)
@@ -1017,3 +1038,37 @@ class CustomContainerSpec(ServiceSpec):
 
 
 yaml.add_representer(CustomContainerSpec, ServiceSpec.yaml_representer)
+
+
+class MonitoringSpec(ServiceSpec):
+    def __init__(self,
+                 service_type: str,
+                 service_id: Optional[str] = None,
+                 config: Optional[Dict[str, str]] = None,
+                 networks: Optional[List[str]] = None,
+                 placement: Optional[PlacementSpec] = None,
+                 unmanaged: bool = False,
+                 preview_only: bool = False,
+                 port: Optional[int] = None,
+                 ):
+        assert service_type in ['grafana', 'node-exporter', 'prometheus']
+
+        super(MonitoringSpec, self).__init__(
+            service_type, service_id,
+            placement=placement, unmanaged=unmanaged,
+            preview_only=preview_only, config=config,
+            networks=networks)
+
+        self.service_type = service_type
+        self.port = port
+
+    def get_port_start(self) -> List[int]:
+        return [self.get_port()]
+
+    def get_port(self) -> int:
+        if self.port:
+            return self.port
+        else:
+            return {'prometheus': 9095,
+                    'node-exporter': 9100,
+                    'grafana': 3000}[self.service_type]