From: avanthakkar Date: Wed, 3 Jan 2024 11:52:33 +0000 (+0530) Subject: mgr/nfs: scrape nfs monitoring endpoint X-Git-Tag: v18.2.5~76^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=33411cac89d46daa750ffe63f2e29130961797b2;p=ceph.git mgr/nfs: scrape nfs monitoring endpoint Fixes: https://tracker.ceph.com/issues/62558 Signed-off-by: avanthakkar (cherry picked from commit 5fb45e5fb8330721e03b04d493202c9c845e33b1) Conflicts: src/pybind/mgr/cephadm/service_discovery.py src/pybind/mgr/cephadm/services/monitoring.py src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 --- diff --git a/src/pybind/mgr/cephadm/service_discovery.py b/src/pybind/mgr/cephadm/service_discovery.py index b3b7b5499eaf4..3601435afa90e 100644 --- a/src/pybind/mgr/cephadm/service_discovery.py +++ b/src/pybind/mgr/cephadm/service_discovery.py @@ -13,6 +13,7 @@ import orchestrator # noqa from mgr_module import ServiceInfoT from mgr_util import build_url from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional +from cephadm.services.nfs import NFSService from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService import secrets @@ -147,6 +148,7 @@ class Root(Server):

HAProxy http sd-config

Ceph exporter http sd-config

NVMeoF http sd-config

+

NFS http sd-config

Prometheus rules

''' @@ -167,6 +169,8 @@ class Root(Server): return self.ceph_exporter_sd_config() elif service == 'nvmeof': return self.nvmeof_sd_config() + elif service == 'nfs': + return self.nfs_sd_config() else: return [] @@ -248,6 +252,19 @@ class Root(Server): }) return srv_entries + def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]: + """Return compatible prometheus config for nfs service.""" + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_type('nfs'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = NFSService.DEFAULT_EXPORTER_PORT + srv_entries.append({ + 'targets': [build_url(host=addr, port=port).lstrip('/')], + 'labels': {'instance': dd.hostname} + }) + return srv_entries + @cherrypy.expose(alias='prometheus/rules') def get_prometheus_rules(self) -> str: """Return currently configured prometheus rules as Yaml.""" diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index 0576d4652d039..c9fa53785971b 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -419,6 +419,7 @@ class PrometheusService(CephadmService): mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included + nfs_sd_url = f'{srv_end_point}service=nfs' # always included alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() @@ -438,7 +439,8 @@ class PrometheusService(CephadmService): 'ceph_exporter_sd_url': ceph_exporter_sd_url, 'nvmeof_sd_url': nvmeof_sd_url, 'external_prometheus_targets': targets, - 'cluster_fsid': FSID + 'cluster_fsid': FSID, + 'nfs_sd_url': nfs_sd_url } ip_to_bind_to = '' diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index f46f65b084bea..fbddccd20bee4 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -22,6 +22,7 @@ logger = logging.getLogger(__name__) class NFSService(CephService): TYPE = 'nfs' + DEFAULT_EXPORTER_PORT = 9587 def ranked(self) -> bool: return True diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 index faccc8f6de26c..83f827d9627eb 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -156,6 +156,26 @@ scrape_configs: {% endif %} {% endif %} +{% if nfs_sd_url %} + - job_name: 'nfs' +{% if secure_monitoring_stack %} + honor_labels: true + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: {{ nfs_sd_url }} + basic_auth: + username: {{ service_discovery_username }} + password: {{ service_discovery_password }} + tls_config: + ca_file: root_cert.pem +{% else %} + http_sd_configs: + - url: {{ nfs_sd_url }} +{% endif %} +{% endif %} + {% if not secure_monitoring_stack %} - job_name: 'federate' scrape_interval: 15s @@ -170,4 +190,3 @@ scrape_configs: static_configs: - targets: {{ external_prometheus_targets }} {% endif %} - diff --git a/src/pybind/mgr/cephadm/tests/test_service_discovery.py b/src/pybind/mgr/cephadm/tests/test_service_discovery.py index 687b64553eaad..159431b3b889a 100644 --- a/src/pybind/mgr/cephadm/tests/test_service_discovery.py +++ b/src/pybind/mgr/cephadm/tests/test_service_discovery.py @@ -23,6 +23,10 @@ class FakeCache: return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'), FakeDaemonDescription('1.2.3.5', [10008], 'node1')] + if service_type == 'nfs': + return [FakeDaemonDescription('1.2.3.4', [9587], 'node0'), + FakeDaemonDescription('1.2.3.5', [9587], 'node1')] + return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'), FakeDaemonDescription('1.2.3.5', [9200], 'node1')] @@ -188,6 +192,20 @@ class TestServiceDiscovery: # check content assert cfg[0]['targets'] == ['1.2.3.4:10008'] + def test_get_sd_config_nfs(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('nfs') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9587'] + def test_get_sd_config_invalid_service(self): mgr = FakeMgr() root = Root(mgr, 5000, '0.0.0.0') diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 71776a8e16a93..78a9a401fd24d 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -731,6 +731,10 @@ class TestMonitoring: http_sd_configs: - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof + - job_name: 'nfs' + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=nfs + - job_name: 'federate' scrape_interval: 15s honor_labels: true @@ -920,6 +924,19 @@ class TestMonitoring: tls_config: ca_file: root_cert.pem + - job_name: 'nfs' + honor_labels: true + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=nfs + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + """).lstrip() _run_cephadm.assert_called_with(