]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/cephadm: return IP addresses in prometheus_sd_config 67339/head
authorkginon <kobi.ginon.ext@nokia.com>
Tue, 3 Mar 2026 16:39:34 +0000 (18:39 +0200)
committerKobi Ginon <kginon@redhat.com>
Mon, 16 Mar 2026 15:40:59 +0000 (17:40 +0200)
Align prometheus service-discovery to return IP addresses for the
manager target, following the same pattern used for alertmanager
and node-exporter.

This avoids discovery failures in environments where the Prometheus
host cannot resolve the FQDNs of the Ceph cluster nodes.

Fixes: https://tracker.ceph.com/issues/74658
Signed-off-by: Kobi Ginon <kginon@redhat.com>
src/pybind/mgr/cephadm/services/service_discovery.py
src/pybind/mgr/cephadm/tests/test_service_discovery.py

index e874f90fe2ac95c6e68b33fe6fef5707c81e72de..1fef54ceb7fdb6d8e8a657c1cdf5c745fa5056c9 100644 (file)
@@ -169,11 +169,14 @@ class Root:
         """
         targets = []
         mgr_daemons = self.mgr.cache.get_daemons_by_service('mgr')
-        host = service_registry.get_service('mgr').get_active_daemon(mgr_daemons).hostname or ''
-        fqdn = self.mgr.get_fqdn(host)
-        port = self.mgr.get_module_option_ex(
-            'prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT)
-        targets.append(f'{fqdn}:{port}')
+        dd = service_registry.get_service('mgr').get_active_daemon(mgr_daemons)
+        if dd:
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port_val = self.mgr.get_module_option_ex(
+                'prometheus', 'server_port', PrometheusService.DEFAULT_MGR_PROMETHEUS_PORT)
+            port: Optional[int] = int(port_val) if port_val is not None else None
+            targets.append(build_url(host=addr, port=port).lstrip('/'))
         return [{"targets": targets, "labels": {}}]
 
     def alertmgr_sd_config(self) -> List[Dict[str, Collection[str]]]:
index ab86a6829dc5890db1968e43975613167203186d..3db1a06d8f2604cc9657a49d3b970f224032e92a 100644 (file)
@@ -1,4 +1,5 @@
 from unittest.mock import MagicMock
+from typing import List
 from cephadm.services.service_discovery import Root
 from cephadm.services.service_registry import service_registry
 
@@ -128,7 +129,10 @@ class FakeMgr:
         return -1, '', 'error'
 
     def get_module_option_ex(self, module, option, default_value):
-        return "9283"
+        # Port value for prometheus server_port (used by prometheus_sd_config).
+        if module == 'prometheus' and option == 'server_port':
+            return 9283
+        return default_value
 
     def daemon_is_self(self, d_type, d_id) -> bool:
         if d_type == 'mgr' and d_id == 'fake_active_mgr':
@@ -152,8 +156,27 @@ class TestServiceDiscovery:
             assert 'labels' in entry
             assert 'targets' in entry
 
-        # check content
-        assert cfg[0]['targets'] == ['node0:9283']
+        # check content - should return IP address instead of hostname
+        assert cfg[0]['targets'] == ['1.2.3.4:9283']
+
+    def test_get_sd_config_prometheus_uses_ip_when_hostname_unresolvable(self, monkeypatch):
+        mgr = FakeMgr()
+
+        def _mgr_daemons_no_ip(service_type: str) -> List[FakeDaemonDescription]:
+            if service_type == 'mgr':
+                return [
+                    FakeDaemonDescription(None, [9922], 'node0', daemon_type='mgr', daemon_id='fake_active_mgr'),
+                    FakeDaemonDescription('1.2.3.5', [9922], 'node1', daemon_type='mgr', daemon_id='fake_standby_mgr'),
+                ]
+            return FakeCache().get_daemons_by_service(service_type)
+
+        monkeypatch.setattr(mgr.cache, 'get_daemons_by_service', _mgr_daemons_no_ip)
+
+        root = Root(mgr)
+        cfg = root.get_sd_config('mgr-prometheus')
+
+        # Expect fallback to inventory IP (FakeInventory returns 1.2.3.4)
+        assert cfg == [{"targets": ["1.2.3.4:9283"], "labels": {}}]
 
     def test_get_sd_config_node_exporter(self):
         mgr = FakeMgr()