]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mgr/cephadm: Add only_bind_ports_on_network spec parameter for Alertmanager
authorShweta Bhosale <Shweta.Bhosale1@ibm.com>
Fri, 29 Nov 2024 10:10:12 +0000 (15:40 +0530)
committerShweta Bhosale <Shweta.Bhosale1@ibm.com>
Tue, 3 Dec 2024 12:09:44 +0000 (17:39 +0530)
Fixes: https://tracker.ceph.com/issues/69070
Signed-off-by: Shweta Bhosale <Shweta.Bhosale1@ibm.com>
src/cephadm/cephadmlib/daemons/monitoring.py
src/cephadm/cephadmlib/net_utils.py
src/cephadm/tests/test_deploy.py
src/pybind/mgr/cephadm/schedule.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/tests/test_services.py
src/python-common/ceph/deployment/service_spec.py

index 710093f0f4674a5ed8f2ff8d8efc1d2c31606732..4820b533be02ff7fc733f932806a3ae4895dbca5 100644 (file)
@@ -23,7 +23,13 @@ from ..daemon_form import register as register_daemon_form
 from ..daemon_identity import DaemonIdentity
 from ..deployment_utils import to_deployment_container
 from ..exceptions import Error
-from ..net_utils import get_fqdn, get_hostname, get_ip_addresses, wrap_ipv6
+from ..net_utils import (
+    get_fqdn,
+    get_hostname,
+    get_ip_addresses,
+    wrap_ipv6,
+    EndPoint,
+)
 
 
 @register_daemon_form
@@ -96,11 +102,6 @@ class Monitoring(ContainerDaemonForm):
             'image': DEFAULT_ALERTMANAGER_IMAGE,
             'cpus': '2',
             'memory': '2GB',
-            'args': [
-                '--cluster.listen-address=:{}'.format(
-                    port_map['alertmanager'][1]
-                ),
-            ],
             'config-json-files': [
                 'alertmanager.yml',
             ],
@@ -255,11 +256,14 @@ class Monitoring(ContainerDaemonForm):
                     ip = meta['ip']
                 if 'ports' in meta and meta['ports']:
                     port = meta['ports'][0]
-            if daemon_type == 'prometheus':
-                config = fetch_configs(ctx)
+            config = fetch_configs(ctx)
+            if daemon_type in ['prometheus', 'alertmanager']:
                 ip_to_bind_to = config.get('ip_to_bind_to', '')
                 if ip_to_bind_to:
                     ip = ip_to_bind_to
+                web_listen_addr = str(EndPoint(ip, port))
+                r += [f'--web.listen-address={web_listen_addr}']
+            if daemon_type == 'prometheus':
                 retention_time = config.get('retention_time', '15d')
                 retention_size = config.get(
                     'retention_size', '0'
@@ -283,9 +287,11 @@ class Monitoring(ContainerDaemonForm):
                     r += ['--web.route-prefix=/prometheus/']
                 else:
                     r += [f'--web.external-url={scheme}://{host}:{port}']
-            r += [f'--web.listen-address={ip}:{port}']
         if daemon_type == 'alertmanager':
-            config = fetch_configs(ctx)
+            clus_listen_addr = str(
+                EndPoint(ip, self.port_map[daemon_type][1])
+            )
+            r += [f'--cluster.listen-address={clus_listen_addr}']
             use_url_prefix = config.get('use_url_prefix', False)
             peers = config.get('peers', list())  # type: ignore
             for peer in peers:
@@ -301,13 +307,11 @@ class Monitoring(ContainerDaemonForm):
         if daemon_type == 'promtail':
             r += ['--config.expand-env']
         if daemon_type == 'prometheus':
-            config = fetch_configs(ctx)
             try:
                 r += [f'--web.config.file={config["web_config"]}']
             except KeyError:
                 pass
         if daemon_type == 'node-exporter':
-            config = fetch_configs(ctx)
             try:
                 r += [f'--web.config.file={config["web_config"]}']
             except KeyError:
index 9a7f138b1c6f8558a9495223b16fcc2cff04abc7..bfa61d933ef5501ea50f015a91139d0ae86769e6 100644 (file)
@@ -24,12 +24,22 @@ class EndPoint:
     def __init__(self, ip: str, port: int) -> None:
         self.ip = ip
         self.port = port
+        self.is_ipv4 = True
+        try:
+            if ip and ipaddress.ip_network(ip).version == 6:
+                self.is_ipv4 = False
+        except Exception:
+            logger.exception('Failed to check ip address version')
 
     def __str__(self) -> str:
-        return f'{self.ip}:{self.port}'
+        if self.is_ipv4:
+            return f'{self.ip}:{self.port}'
+        return f'[{self.ip}]:{self.port}'
 
     def __repr__(self) -> str:
-        return f'{self.ip}:{self.port}'
+        if self.is_ipv4:
+            return f'{self.ip}:{self.port}'
+        return f'[{self.ip}]:{self.port}'
 
 
 def attempt_bind(ctx, s, address, port):
index c5094db335fd68fa45e78bb09ba4688d80f6f099..1736639ed552708eac3b7d180c6a64bb2ea3d79a 100644 (file)
@@ -316,7 +316,7 @@ def test_deploy_a_monitoring_container(cephadm_fs, funkypatch):
         runfile_lines = f.read().splitlines()
     assert 'podman' in runfile_lines[-1]
     assert runfile_lines[-1].endswith(
-        'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095 --web.listen-address=1.2.3.4:9095'
+        'quay.io/titans/prometheus:latest --config.file=/etc/prometheus/prometheus.yml --storage.tsdb.path=/prometheus --web.listen-address=1.2.3.4:9095 --storage.tsdb.retention.time=15d --storage.tsdb.retention.size=0 --web.external-url=http://10.10.10.10:9095'
     )
     assert '--user 8765' in runfile_lines[-1]
     assert f'-v /var/lib/ceph/{fsid}/prometheus.fire/etc/prometheus:/etc/prometheus:Z' in runfile_lines[-1]
index 98d2fe99897eca6ffc14876bd41673f886083c28..04d3712c50ac4cbb30a1b725cb9b9314a0e6fbe4 100644 (file)
@@ -385,6 +385,8 @@ class HostAssignment(object):
 
     def find_ip_on_host(self, hostname: str, subnets: List[str]) -> Optional[str]:
         for subnet in subnets:
+            # to normalize subnet
+            subnet = str(ipaddress.ip_network(subnet))
             ips: List[str] = []
             # following is to allow loopback interfaces for both ipv4 and ipv6. Since we
             # only have the subnet (and no IP) we assume default loopback IP address.
index 1b9cf6185708e028e5cc8b30cf7df659784be57b..9c5b5a112f336b6aab2573d332ab1268991d72a0 100644 (file)
@@ -3,6 +3,7 @@ import logging
 import os
 import socket
 from typing import List, Any, Tuple, Dict, Optional, cast
+import ipaddress
 
 from mgr_module import HandleCommandResult
 
@@ -57,6 +58,8 @@ class GrafanaService(CephadmService):
             if ip_to_bind_to:
                 daemon_spec.port_ips = {str(grafana_port): ip_to_bind_to}
                 grafana_ip = ip_to_bind_to
+                if ipaddress.ip_network(grafana_ip).version == 6:
+                    grafana_ip = f"[{grafana_ip}]"
 
         domain = self.mgr.get_fqdn(daemon_spec.host)
         mgmt_gw_ips = []
@@ -354,6 +357,13 @@ class AlertmanagerService(CephadmService):
             addr = self.mgr.get_fqdn(dd.hostname)
             peers.append(build_url(host=addr, port=port).lstrip('/'))
 
+        ip_to_bind_to = ''
+        if spec.only_bind_port_on_networks and spec.networks:
+            assert daemon_spec.host is not None
+            ip_to_bind_to = self.mgr.get_first_matching_network_ip(daemon_spec.host, spec) or ''
+            if ip_to_bind_to:
+                daemon_spec.port_ips = {str(port): ip_to_bind_to}
+
         deps.append(f'secure_monitoring_stack:{self.mgr.secure_monitoring_stack}')
         if security_enabled:
             alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
@@ -376,7 +386,8 @@ class AlertmanagerService(CephadmService):
                 },
                 'peers': peers,
                 'web_config': '/etc/alertmanager/web.yml',
-                'use_url_prefix': mgmt_gw_enabled
+                'use_url_prefix': mgmt_gw_enabled,
+                'ip_to_bind_to': ip_to_bind_to
             }, sorted(deps)
         else:
             return {
@@ -384,7 +395,8 @@ class AlertmanagerService(CephadmService):
                     "alertmanager.yml": yml
                 },
                 "peers": peers,
-                'use_url_prefix': mgmt_gw_enabled
+                'use_url_prefix': mgmt_gw_enabled,
+                'ip_to_bind_to': ip_to_bind_to
             }, sorted(deps)
 
     def get_active_daemon(self, daemon_descrs: List[DaemonDescription]) -> DaemonDescription:
index 75c7c3c5bf79e322ccf39d1ea6dc4186fb922cf3..0276ba1eb300edf6b1cc095792bd3db0c5017637 100644 (file)
@@ -575,7 +575,14 @@ class TestMonitoring:
         mock_getfqdn.return_value = purl.hostname
 
         with with_host(cephadm_module, "test"):
-            with with_service(cephadm_module, AlertManagerSpec()):
+            cephadm_module.cache.update_host_networks('test', {
+                '1.2.3.0/24': {
+                    'if0': ['1.2.3.1']
+                },
+            })
+            with with_service(cephadm_module, AlertManagerSpec('alertmanager',
+                                                               networks=['1.2.3.0/24'],
+                                                               only_bind_port_on_networks=True)):
                 y = dedent(self._get_config(expected_yaml_url)).lstrip()
                 _run_cephadm.assert_called_with(
                     'test',
@@ -589,11 +596,12 @@ class TestMonitoring:
                         "deploy_arguments": [],
                         "params": {
                             'tcp_ports': [9093, 9094],
+                            'port_ips': {"9094": "1.2.3.1"},
                         },
                         "meta": {
                             'service_name': 'alertmanager',
                             'ports': [9093, 9094],
-                            'ip': None,
+                            'ip': '1.2.3.1',
                             'deployed_by': [],
                             'rank': None,
                             'rank_generation': None,
@@ -606,6 +614,7 @@ class TestMonitoring:
                             },
                             "peers": [],
                             "use_url_prefix": False,
+                            "ip_to_bind_to": "1.2.3.1",
                         }
                     }),
                     error_ok=True,
@@ -628,8 +637,16 @@ class TestMonitoring:
             cephadm_module.secure_monitoring_stack = True
             cephadm_module.set_store(AlertmanagerService.USER_CFG_KEY, 'alertmanager_user')
             cephadm_module.set_store(AlertmanagerService.PASS_CFG_KEY, 'alertmanager_plain_password')
+
+            cephadm_module.cache.update_host_networks('test', {
+                'fd12:3456:789a::/64': {
+                    'if0': ['fd12:3456:789a::10']
+                },
+            })
             with with_service(cephadm_module, MgmtGatewaySpec("mgmt-gateway")) as _, \
-                 with_service(cephadm_module, AlertManagerSpec()):
+                 with_service(cephadm_module, AlertManagerSpec('alertmanager',
+                                                               networks=['fd12:3456:789a::/64'],
+                                                               only_bind_port_on_networks=True)):
 
                 y = dedent("""
                 # This file is generated by cephadm.
@@ -680,11 +697,12 @@ class TestMonitoring:
                         "deploy_arguments": [],
                         "params": {
                             'tcp_ports': [9093, 9094],
+                            'port_ips': {"9094": "fd12:3456:789a::10"}
                         },
                         "meta": {
                             'service_name': 'alertmanager',
                             'ports': [9093, 9094],
-                            'ip': None,
+                            'ip': 'fd12:3456:789a::10',
                             'deployed_by': [],
                             'rank': None,
                             'rank_generation': None,
@@ -702,6 +720,7 @@ class TestMonitoring:
                             'peers': [],
                             'web_config': '/etc/alertmanager/web.yml',
                             "use_url_prefix": True,
+                            "ip_to_bind_to": "fd12:3456:789a::10",
                         }
                     }),
                     error_ok=True,
@@ -795,6 +814,7 @@ class TestMonitoring:
                             'peers': [],
                             'web_config': '/etc/alertmanager/web.yml',
                             "use_url_prefix": False,
+                            "ip_to_bind_to": "",
                         }
                     }),
                     error_ok=True,
index 10e9f26794000faf228f2f1044a466442198e6d7..8b81dd3dddc3a86f20de2d5bc25b1629d523378f 100644 (file)
@@ -2375,6 +2375,7 @@ class AlertManagerSpec(MonitoringSpec):
                  user_data: Optional[Dict[str, Any]] = None,
                  config: Optional[Dict[str, str]] = None,
                  networks: Optional[List[str]] = None,
+                 only_bind_port_on_networks: bool = False,
                  port: Optional[int] = None,
                  secure: bool = False,
                  extra_container_args: Optional[GeneralArgList] = None,
@@ -2405,6 +2406,7 @@ class AlertManagerSpec(MonitoringSpec):
         #                        <webhook_configs> configuration.
         self.user_data = user_data or {}
         self.secure = secure
+        self.only_bind_port_on_networks = only_bind_port_on_networks
 
     def get_port_start(self) -> List[int]:
         return [self.get_port(), 9094]
@@ -2451,7 +2453,7 @@ class GrafanaSpec(MonitoringSpec):
         self.protocol = protocol
 
         # whether ports daemons for this service bind to should
-        # bind to only hte networks listed in networks param, or
+        # bind to only the networks listed in networks param, or
         # to all networks. Defaults to false which is saying to bind
         # on all networks.
         self.only_bind_port_on_networks = only_bind_port_on_networks