From: Daniel Pivonka Date: Wed, 12 May 2021 17:46:11 +0000 (-0400) Subject: mgr/cephadm: allow monitoring stack services to bind to network and port X-Git-Tag: v17.1.0~1710^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F41444%2Fhead;p=ceph.git mgr/cephadm: allow monitoring stack services to bind to network and port Signed-off-by: Daniel Pivonka --- diff --git a/doc/cephadm/monitoring.rst b/doc/cephadm/monitoring.rst index 06d738ae1d8e..7066174a3d90 100644 --- a/doc/cephadm/monitoring.rst +++ b/doc/cephadm/monitoring.rst @@ -128,6 +128,24 @@ update its configuration: The ``reconfig`` command also sets the proper URL for Ceph Dashboard. +Networks and Ports +~~~~~~~~~~~~~~~~~~ + +All monitoring services can have the network and port they bind to configured with a yaml service specification + +example spec file: + +.. code-block:: yaml + + service_type: grafana + service_name: grafana + placement: + count: 1 + networks: + - 192.169.142.0/24 + spec: + port: 4200 + Using custom images ~~~~~~~~~~~~~~~~~~~ diff --git a/src/cephadm/cephadm b/src/cephadm/cephadm index 7e1df254da6b..55e8c834788e 100755 --- a/src/cephadm/cephadm +++ b/src/cephadm/cephadm @@ -280,7 +280,6 @@ class Monitoring(object): 'args': [ '--config.file=/etc/prometheus/prometheus.yml', '--storage.tsdb.path=/prometheus', - '--web.listen-address=:{}'.format(port_map['prometheus'][0]), ], 'config-json-files': [ 'prometheus.yml', @@ -311,7 +310,6 @@ class Monitoring(object): 'cpus': '2', 'memory': '2GB', 'args': [ - '--web.listen-address=:{}'.format(port_map['alertmanager'][0]), '--cluster.listen-address=:{}'.format(port_map['alertmanager'][1]), ], 'config-json-files': [ @@ -2133,6 +2131,17 @@ def get_daemon_args(ctx, fsid, daemon_type, daemon_id): elif daemon_type in Monitoring.components: metadata = Monitoring.components[daemon_type] r += metadata.get('args', list()) + # set ip and port to bind to for nodeexporter,alertmanager,prometheus + if daemon_type != 'grafana': + ip = '' + port = Monitoring.port_map[daemon_type][0] + if 'meta_json' in ctx and ctx.meta_json: + meta = json.loads(ctx.meta_json) or {} + if 'ip' in meta and meta['ip']: + ip = meta['ip'] + if 'ports' in meta and meta['ports']: + port = meta['ports'][0] + r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'alertmanager': config = get_parm(ctx.config_json) peers = config.get('peers', list()) # type: ignore @@ -2932,15 +2941,7 @@ class Firewalld(object): def update_firewalld(ctx, daemon_type): # type: (CephadmContext, str) -> None firewall = Firewalld(ctx) - firewall.enable_service_for(daemon_type) - - fw_ports = [] - - if daemon_type in Monitoring.port_map.keys(): - fw_ports.extend(Monitoring.port_map[daemon_type]) # prometheus etc - - firewall.open_ports(fw_ports) firewall.apply_rules() @@ -4341,9 +4342,6 @@ def command_deploy(ctx): elif daemon_type in Monitoring.components: # monitoring daemon - prometheus, grafana, alertmanager, node-exporter # Default Checks - if not ctx.reconfig and not redeploy: - daemon_ports.extend(Monitoring.port_map[daemon_type]) - # make sure provided config-json is sufficient config = get_parm(ctx.config_json) # type: ignore required_files = Monitoring.components[daemon_type].get('config-json-files', list()) diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index f749f0559517..c32a1a1ee6bb 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -30,7 +30,9 @@ class GrafanaService(CephadmService): prom_services = [] # type: List[str] for dd in self.mgr.cache.get_daemons_by_service('prometheus'): assert dd.hostname is not None - prom_services.append(dd.hostname) + addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else 9095 + prom_services.append(addr + ':' + str(port)) deps.append(dd.name()) grafana_data_sources = self.mgr.template.render( 'services/grafana/ceph-dashboard.yml.j2', {'hosts': prom_services}) @@ -53,7 +55,10 @@ class GrafanaService(CephadmService): }) grafana_ini = self.mgr.template.render( - 'services/grafana/grafana.ini.j2', {'http_port': self.DEFAULT_SERVICE_PORT}) + 'services/grafana/grafana.ini.j2', { + 'http_port': daemon_spec.ports[0] if daemon_spec.ports else self.DEFAULT_SERVICE_PORT, + 'http_addr': daemon_spec.ip if daemon_spec.ip else '' + }) config_file = { 'files': { @@ -76,8 +81,9 @@ class GrafanaService(CephadmService): # TODO: signed cert dd = self.get_active_daemon(daemon_descrs) assert dd.hostname is not None - service_url = 'https://{}:{}'.format( - self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) + addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + service_url = 'https://{}:{}'.format(addr, port) self._set_service_url_on_dashboard( 'Grafana', 'dashboard get-grafana-api-url', @@ -170,8 +176,9 @@ class AlertmanagerService(CephadmService): def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: dd = self.get_active_daemon(daemon_descrs) assert dd.hostname is not None - service_url = 'http://{}:{}'.format(self._inventory_get_addr(dd.hostname), - self.DEFAULT_SERVICE_PORT) + addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + service_url = 'http://{}:{}'.format(addr, port) self._set_service_url_on_dashboard( 'AlertManager', 'dashboard get-alertmanager-api-host', @@ -232,10 +239,11 @@ class PrometheusService(CephadmService): for dd in self.mgr.cache.get_daemons_by_service('node-exporter'): assert dd.hostname is not None deps.append(dd.name()) - addr = self.mgr.inventory.get_addr(dd.hostname) + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = str(dd.ports[0]) if dd.ports else '9100' nodes.append({ 'hostname': dd.hostname, - 'url': addr.split(':')[0] + ':9100' + 'url': addr.split(':')[0] + ':' + port }) # scrape alert managers @@ -243,8 +251,9 @@ class PrometheusService(CephadmService): for dd in self.mgr.cache.get_daemons_by_service('alertmanager'): assert dd.hostname is not None deps.append(dd.name()) - addr = self.mgr.inventory.get_addr(dd.hostname) - alertmgr_targets.append("'{}:9093'".format(addr.split(':')[0])) + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = str(dd.ports[0]) if dd.ports else '9093' + alertmgr_targets.append("'{}:{}'".format(addr.split(':')[0], port)) # scrape haproxies haproxy_targets = [] @@ -293,8 +302,9 @@ class PrometheusService(CephadmService): def config_dashboard(self, daemon_descrs: List[DaemonDescription]) -> None: dd = self.get_active_daemon(daemon_descrs) assert dd.hostname is not None - service_url = 'http://{}:{}'.format( - self._inventory_get_addr(dd.hostname), self.DEFAULT_SERVICE_PORT) + addr = dd.ip if dd.ip else self._inventory_get_addr(dd.hostname) + port = dd.ports[0] if dd.ports else self.DEFAULT_SERVICE_PORT + service_url = 'http://{}:{}'.format(addr, port) self._set_service_url_on_dashboard( 'Prometheus', 'dashboard get-prometheus-api-host', diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 index c539cfc6f659..8946cac0a098 100644 --- a/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/grafana/ceph-dashboard.yml.j2 @@ -11,7 +11,7 @@ datasources: type: 'prometheus' access: 'proxy' orgId: 1 - url: 'http://{{ host }}:9095' + url: 'http://{{ host }}' basicAuth: false isDefault: {{ 'true' if loop.first else 'false' }} editable: false diff --git a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 index 51aff3f9fb5c..3d6303f4466c 100644 --- a/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 +++ b/src/pybind/mgr/cephadm/templates/services/grafana/grafana.ini.j2 @@ -11,6 +11,7 @@ cert_file = /etc/grafana/certs/cert_file cert_key = /etc/grafana/certs/cert_key http_port = {{ http_port }} + http_addr = {{ http_addr }} [security] admin_user = admin admin_password = admin diff --git a/src/pybind/mgr/cephadm/tests/test_cephadm.py b/src/pybind/mgr/cephadm/tests/test_cephadm.py index 84c4cb5f6a6d..e84a5cd8c4e5 100644 --- a/src/pybind/mgr/cephadm/tests/test_cephadm.py +++ b/src/pybind/mgr/cephadm/tests/test_cephadm.py @@ -3,6 +3,8 @@ from contextlib import contextmanager import pytest +import yaml + from ceph.deployment.drive_group import DriveGroupSpec, DeviceSelection from cephadm.serve import CephadmServe from cephadm.services.osd import OSD, OSDRemovalQueue, OsdIdClaims @@ -298,6 +300,38 @@ class TestCephadm(object): + '"keyring": "", "files": {"config": "[mon.test]\\npublic network = 127.0.0.0/8\\n"}}', image='') + @mock.patch("cephadm.serve.CephadmServe._run_cephadm") + def test_monitoring_ports(self, _run_cephadm, cephadm_module: CephadmOrchestrator): + _run_cephadm.return_value = ('{}', '', 0) + + with with_host(cephadm_module, 'test'): + + yaml_str = """service_type: alertmanager +service_name: alertmanager +placement: + count: 1 +spec: + port: 4200 +""" + yaml_file = yaml.safe_load(yaml_str) + spec = ServiceSpec.from_json(yaml_file) + + with mock.patch("cephadm.services.monitoring.AlertmanagerService.generate_config", return_value=({}, [])): + with with_service(cephadm_module, spec): + + CephadmServe(cephadm_module)._check_daemons() + + _run_cephadm.assert_called_with( + 'test', 'alertmanager.test', 'deploy', [ + '--name', 'alertmanager.test', + '--meta-json', '{"service_name": "alertmanager", "ports": [4200, 9094], "ip": null, "deployed_by": [], "rank": null, "rank_generation": null}', + '--config-json', '-', + '--tcp-ports', '4200 9094', + '--reconfig' + ], + stdin='{}', + image='') + @mock.patch("cephadm.serve.CephadmServe._run_cephadm", _run_cephadm('{}')) def test_daemon_check_post(self, cephadm_module: CephadmOrchestrator): with with_host(cephadm_module, 'test'): diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index c9429bbbf2cd..2a4c098c344b 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -433,6 +433,9 @@ class ServiceSpec(object): 'alertmanager': AlertManagerSpec, 'ingress': IngressSpec, 'container': CustomContainerSpec, + 'grafana': MonitoringSpec, + 'node-exporter': MonitoringSpec, + 'prometheus': MonitoringSpec, }.get(service_type, cls) if ret == ServiceSpec and not service_type: raise SpecValidationError('Spec needs a "service_type" key.') @@ -852,6 +855,7 @@ class AlertManagerSpec(ServiceSpec): user_data: Optional[Dict[str, Any]] = None, config: Optional[Dict[str, str]] = None, networks: Optional[List[str]] = None, + port: Optional[int] = None, ): assert service_type == 'alertmanager' super(AlertManagerSpec, self).__init__( @@ -874,6 +878,23 @@ class AlertManagerSpec(ServiceSpec): # added to the default receivers' # configuration. self.user_data = user_data or {} + self.port = port + + def get_port_start(self) -> List[int]: + return [self.get_port(), 9094] + + def get_port(self) -> int: + if self.port: + return self.port + else: + return 9093 + + def validate(self) -> None: + super(AlertManagerSpec, self).validate() + + if self.port == 9094: + raise SpecValidationError( + 'Port 9094 is reserved for AlertManager cluster listen address') yaml.add_representer(AlertManagerSpec, ServiceSpec.yaml_representer) @@ -1017,3 +1038,37 @@ class CustomContainerSpec(ServiceSpec): yaml.add_representer(CustomContainerSpec, ServiceSpec.yaml_representer) + + +class MonitoringSpec(ServiceSpec): + def __init__(self, + service_type: str, + service_id: Optional[str] = None, + config: Optional[Dict[str, str]] = None, + networks: Optional[List[str]] = None, + placement: Optional[PlacementSpec] = None, + unmanaged: bool = False, + preview_only: bool = False, + port: Optional[int] = None, + ): + assert service_type in ['grafana', 'node-exporter', 'prometheus'] + + super(MonitoringSpec, self).__init__( + service_type, service_id, + placement=placement, unmanaged=unmanaged, + preview_only=preview_only, config=config, + networks=networks) + + self.service_type = service_type + self.port = port + + def get_port_start(self) -> List[int]: + return [self.get_port()] + + def get_port(self) -> int: + if self.port: + return self.port + else: + return {'prometheus': 9095, + 'node-exporter': 9100, + 'grafana': 3000}[self.service_type]