From: Shweta Bhosale Date: Fri, 29 Nov 2024 10:10:12 +0000 (+0530) Subject: mgr/cephadm: Add only_bind_ports_on_network spec parameter for Alertmanager X-Git-Tag: testing/wip-vshankar-testing-20250115.164543-debug~24^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=20cb0063ebd6ec50e70a2189ad9f554c056992ff;p=ceph-ci.git mgr/cephadm: Add only_bind_ports_on_network spec parameter for Alertmanager Fixes: https://tracker.ceph.com/issues/69070 Signed-off-by: Shweta Bhosale --- diff --git a/src/cephadm/cephadmlib/daemons/monitoring.py b/src/cephadm/cephadmlib/daemons/monitoring.py index 710093f0f46..4820b533be0 100644 --- a/src/cephadm/cephadmlib/daemons/monitoring.py +++ b/src/cephadm/cephadmlib/daemons/monitoring.py @@ -23,7 +23,13 @@ from ..daemon_form import register as register_daemon_form from ..daemon_identity import DaemonIdentity from ..deployment_utils import to_deployment_container from ..exceptions import Error -from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6 +from ..net_utils import ( + get_fqdn, + get_hostname, + get_ip_addresses, + wrap_ipv6, + EndPoint, +) @register_daemon_form @@ -96,11 +102,6 @@ class Monitoring(ContainerDaemonForm): 'image': DEFAULT_ALERTMANAGER_IMAGE, 'cpus': '2', 'memory': '2GB', - 'args': [ - '--cluster.listen-address=:{}'.format( - port_map['alertmanager'][1] - ), - ], 'config-json-files': [ 'alertmanager.yml', ], @@ -255,11 +256,14 @@ class Monitoring(ContainerDaemonForm): ip = meta['ip'] if 'ports' in meta and meta['ports']: port = meta['ports'][0] - if daemon_type == 'prometheus': - config = fetch_configs(ctx) + config = fetch_configs(ctx) + if daemon_type in ['prometheus', 'alertmanager']: ip_to_bind_to = config.get('ip_to_bind_to', '') if ip_to_bind_to: ip = ip_to_bind_to + web_listen_addr = str(EndPoint(ip, port)) + r += [f'--web.listen-address={web_listen_addr}'] + if daemon_type == 'prometheus': retention_time = config.get('retention_time', '15d') retention_size = config.get( 'retention_size', '0' @@ -283,9 +287,11 @@ class Monitoring(ContainerDaemonForm): r += ['--web.route-prefix=/prometheus/'] else: r += [f'--web.external-url={scheme}://{host}:{port}'] - r += [f'--web.listen-address={ip}:{port}'] if daemon_type == 'alertmanager': - config = fetch_configs(ctx) + clus_listen_addr = str( + EndPoint(ip, self.port_map[daemon_type][1]) + ) + r += [f'--cluster.listen-address={clus_listen_addr}'] use_url_prefix = config.get('use_url_prefix', False) peers = config.get('peers', list()) # type: ignore for peer in peers: @@ -301,13 +307,11 @@ class Monitoring(ContainerDaemonForm): if daemon_type == 'promtail': r += ['--config.expand-env'] if daemon_type == 'prometheus': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: pass if daemon_type == 'node-exporter': - config = fetch_configs(ctx) try: r += [f'--web.config.file={config["web_config"]}'] except KeyError: diff --git a/src/cephadm/cephadmlib/net_utils.py b/src/cephadm/cephadmlib/net_utils.py index 9a7f138b1c6..bfa61d933ef 100644 --- a/src/cephadm/cephadmlib/net_utils.py +++ b/src/cephadm/cephadmlib/net_utils.py @@ -24,12 +24,22 @@ class EndPoint: def __init__(self, ip: str, port: int) -> None: self.ip = ip self.port = port + self.is_ipv4 = True + try: + if ip and ipaddress.ip_network(ip).version == 6: + self.is_ipv4 = False + except Exception: + logger.exception('Failed to check ip address version') def __str__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def __repr__(self) -> str: - return f'{self.ip}:{self.port}' + if self.is_ipv4: + return f'{self.ip}:{self.port}' + return f'[{self.ip}]:{self.port}' def attempt_bind(ctx, s, address, port): diff --git a/src/cephadm/tests/test_deploy.py b/src/cephadm/tests/test_deploy.py index c5094db335f..1736639ed55 100644 --- a/src/cephadm/tests/test_deploy.py +++ b/src/cephadm/tests/test_deploy.py @@ -316,7 +316,7 @@ def test_deploy_a_monitoring_container(cephadm_fs, funkypatch): runfile_lines = f.read().splitlines() assert 'podman' in runfile_lines[-1] assert runfile_lines[-1].endswith( - 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095' + 'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=1.2.3.4:9095 --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095' ) assert '--user 8765' in runfile_lines[-1] assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1] diff --git a/src/pybind/mgr/cephadm/schedule.py b/src/pybind/mgr/cephadm/schedule.py index 98d2fe99897..04d3712c50a 100644 --- a/src/pybind/mgr/cephadm/schedule.py +++ b/src/pybind/mgr/cephadm/schedule.py @@ -385,6 +385,8 @@ class HostAssignment(object): def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]: for subnet in subnets: + # to normalize subnet + subnet = str(ipaddress.ip_network(subnet)) ips: List[str] = [] # following is to allow loopback interfaces for both ipv4 and ipv6. Since we # only have the subnet (and no IP) we assume default loopback IP address. diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 1b9cf618570..9c5b5a112f3 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -3,6 +3,7 @@ import logging import os import socket from typing import List, Any, Tuple, Dict, Optional, cast +import ipaddress from mgr_module import HandleCommandResult @@ -57,6 +58,8 @@ class GrafanaService(CephadmService): if ip_to_bind_to: daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to} grafana_ip = ip_to_bind_to + if ipaddress.ip_network(grafana_ip).version == 6: + grafana_ip = f"[{grafana_ip}]" domain = self.mgr.get_fqdn(daemon_spec.host) mgmt_gw_ips = [] @@ -354,6 +357,13 @@ class AlertmanagerService(CephadmService): addr = self.mgr.get_fqdn(dd.hostname) peers.append(build_url(host=addr, port=port).lstrip('/')) + ip_to_bind_to = '' + if spec.only_bind_port_on_networks and spec.networks: + assert daemon_spec.host is not None + ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or '' + if ip_to_bind_to: + daemon_spec.port_ips = {str(port): ip_to_bind_to} + deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}') if security_enabled: alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() @@ -376,7 +386,8 @@ class AlertmanagerService(CephadmService): }, 'peers': peers, 'web_config': '/etc/alertmanager/web.yml', - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) else: return { @@ -384,7 +395,8 @@ class AlertmanagerService(CephadmService): "alertmanager.yml": yml }, "peers": peers, - 'use_url_prefix': mgmt_gw_enabled + 'use_url_prefix': mgmt_gw_enabled, + 'ip_to_bind_to': ip_to_bind_to }, sorted(deps) def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription: diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 75c7c3c5bf7..0276ba1eb30 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -575,7 +575,14 @@ class TestMonitoring: mock_getfqdn.return_value = purl.hostname with with_host(cephadm_module, "test"): - with with_service(cephadm_module, AlertManagerSpec()): + cephadm_module.cache.update_host_networks('test', { + '1.2.3.0/24': { + 'if0': ['1.2.3.1'] + }, + }) + with with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['1.2.3.0/24'], + only_bind_port_on_networks=True)): y = dedent(self._get_config(expected_yaml_url)).lstrip() _run_cephadm.assert_called_with( 'test', @@ -589,11 +596,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "1.2.3.1"}, }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': '1.2.3.1', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -606,6 +614,7 @@ class TestMonitoring: }, "peers": [], "use_url_prefix": False, + "ip_to_bind_to": "1.2.3.1", } }), error_ok=True, @@ -628,8 +637,16 @@ class TestMonitoring: cephadm_module.secure_monitoring_stack = True cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user') cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password') + + cephadm_module.cache.update_host_networks('test', { + 'fd12:3456:789a::/64': { + 'if0': ['fd12:3456:789a::10'] + }, + }) with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \ - with_service(cephadm_module, AlertManagerSpec()): + with_service(cephadm_module, AlertManagerSpec('alertmanager', + networks=['fd12:3456:789a::/64'], + only_bind_port_on_networks=True)): y = dedent(""" # This file is generated by cephadm. @@ -680,11 +697,12 @@ class TestMonitoring: "deploy_arguments": [], "params": { 'tcp_ports': [9093, 9094], + 'port_ips': {"9094": "fd12:3456:789a::10"} }, "meta": { 'service_name': 'alertmanager', 'ports': [9093, 9094], - 'ip': None, + 'ip': 'fd12:3456:789a::10', 'deployed_by': [], 'rank': None, 'rank_generation': None, @@ -702,6 +720,7 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": True, + "ip_to_bind_to": "fd12:3456:789a::10", } }), error_ok=True, @@ -795,6 +814,7 @@ class TestMonitoring: 'peers': [], 'web_config': '/etc/alertmanager/web.yml', "use_url_prefix": False, + "ip_to_bind_to": "", } }), error_ok=True, diff --git a/src/python-common/ceph/deployment/service_spec.py b/src/python-common/ceph/deployment/service_spec.py index 10e9f267940..8b81dd3dddc 100644 --- a/src/python-common/ceph/deployment/service_spec.py +++ b/src/python-common/ceph/deployment/service_spec.py @@ -2375,6 +2375,7 @@ class AlertManagerSpec(MonitoringSpec): user_data: Optional[Dict[str, Any]] = None, config: Optional[Dict[str, str]] = None, networks: Optional[List[str]] = None, + only_bind_port_on_networks: bool = False, port: Optional[int] = None, secure: bool = False, extra_container_args: Optional[GeneralArgList] = None, @@ -2405,6 +2406,7 @@ class AlertManagerSpec(MonitoringSpec): # configuration. self.user_data = user_data or {} self.secure = secure + self.only_bind_port_on_networks = only_bind_port_on_networks def get_port_start(self) -> List[int]: return [self.get_port(), 9094] @@ -2451,7 +2453,7 @@ class GrafanaSpec(MonitoringSpec): self.protocol = protocol # whether ports daemons for this service bind to should - # bind to only hte networks listed in networks param, or + # bind to only the networks listed in networks param, or # to all networks. Defaults to false which is saying to bind # on all networks. self.only_bind_port_on_networks = only_bind_port_on_networks