From 2f9453b0b5d7b38b39287c5f31302bc082c0b10a Mon Sep 17 00:00:00 2001 From: avanthakkar Date: Wed, 3 Jan 2024 17:22:33 +0530 Subject: [PATCH] mgr/nfs: scrape nfs monitoring endpoint Fixes: https://tracker.ceph.com/issues/62558 Signed-off-by: avanthakkar (cherry picked from commit 5fb45e5fb8330721e03b04d493202c9c845e33b1) Conflicts: src/pybind/mgr/cephadm/service_discovery.py src/pybind/mgr/cephadm/services/monitoring.py src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 --- src/pybind/mgr/cephadm/service_discovery.py | 17 +++++++++++++++ src/pybind/mgr/cephadm/services/monitoring.py | 4 +++- src/pybind/mgr/cephadm/services/nfs.py | 1 + .../services/prometheus/prometheus.yml.j2 | 21 ++++++++++++++++++- .../cephadm/tests/test_service_discovery.py | 18 ++++++++++++++++ src/pybind/mgr/cephadm/tests/test_services.py | 17 +++++++++++++++ 6 files changed, 76 insertions(+), 2 deletions(-) diff --git a/src/pybind/mgr/cephadm/service_discovery.py b/src/pybind/mgr/cephadm/service_discovery.py index b72570382f43a..9755aceeddb41 100644 --- a/src/pybind/mgr/cephadm/service_discovery.py +++ b/src/pybind/mgr/cephadm/service_discovery.py @@ -13,6 +13,7 @@ import orchestrator # noqa from mgr_module import ServiceInfoT from mgr_util import build_url from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional +from cephadm.services.nfs import NFSService from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService import secrets @@ -144,6 +145,7 @@ class Root(Server):

HAProxy http sd-config

Ceph exporter http sd-config

NVMeoF http sd-config

+

NFS http sd-config

Prometheus rules

''' @@ -164,6 +166,8 @@ class Root(Server): return self.ceph_exporter_sd_config() elif service == 'nvmeof': return self.nvmeof_sd_config() + elif service == 'nfs': + return self.nfs_sd_config() else: return [] @@ -245,6 +249,19 @@ class Root(Server): }) return srv_entries + def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]: + """Return compatible prometheus config for nfs service.""" + srv_entries = [] + for dd in self.mgr.cache.get_daemons_by_type('nfs'): + assert dd.hostname is not None + addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname) + port = NFSService.DEFAULT_EXPORTER_PORT + srv_entries.append({ + 'targets': [build_url(host=addr, port=port).lstrip('/')], + 'labels': {'instance': dd.hostname} + }) + return srv_entries + @cherrypy.expose(alias='prometheus/rules') def get_prometheus_rules(self) -> str: """Return currently configured prometheus rules as Yaml.""" diff --git a/src/pybind/mgr/cephadm/services/monitoring.py b/src/pybind/mgr/cephadm/services/monitoring.py index c12c637c39d17..42245b53d418e 100644 --- a/src/pybind/mgr/cephadm/services/monitoring.py +++ b/src/pybind/mgr/cephadm/services/monitoring.py @@ -447,6 +447,7 @@ class PrometheusService(CephadmService): mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus' # always included ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter' # always included nvmeof_sd_url = f'{srv_end_point}service=nvmeof' # always included + nfs_sd_url = f'{srv_end_point}service=nfs' # always included alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials() prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials() @@ -466,7 +467,8 @@ class PrometheusService(CephadmService): 'ceph_exporter_sd_url': ceph_exporter_sd_url, 'nvmeof_sd_url': nvmeof_sd_url, 'external_prometheus_targets': targets, - 'cluster_fsid': FSID + 'cluster_fsid': FSID, + 'nfs_sd_url': nfs_sd_url } ip_to_bind_to = '' diff --git a/src/pybind/mgr/cephadm/services/nfs.py b/src/pybind/mgr/cephadm/services/nfs.py index f46f65b084bea..fbddccd20bee4 100644 --- a/src/pybind/mgr/cephadm/services/nfs.py +++ b/src/pybind/mgr/cephadm/services/nfs.py @@ -22,6 +22,7 @@ logger = logging.getLogger(__name__) class NFSService(CephService): TYPE = 'nfs' + DEFAULT_EXPORTER_PORT = 9587 def ranked(self) -> bool: return True diff --git a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 index faccc8f6de26c..83f827d9627eb 100644 --- a/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 +++ b/src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2 @@ -156,6 +156,26 @@ scrape_configs: {% endif %} {% endif %} +{% if nfs_sd_url %} + - job_name: 'nfs' +{% if secure_monitoring_stack %} + honor_labels: true + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: {{ nfs_sd_url }} + basic_auth: + username: {{ service_discovery_username }} + password: {{ service_discovery_password }} + tls_config: + ca_file: root_cert.pem +{% else %} + http_sd_configs: + - url: {{ nfs_sd_url }} +{% endif %} +{% endif %} + {% if not secure_monitoring_stack %} - job_name: 'federate' scrape_interval: 15s @@ -170,4 +190,3 @@ scrape_configs: static_configs: - targets: {{ external_prometheus_targets }} {% endif %} - diff --git a/src/pybind/mgr/cephadm/tests/test_service_discovery.py b/src/pybind/mgr/cephadm/tests/test_service_discovery.py index 687b64553eaad..159431b3b889a 100644 --- a/src/pybind/mgr/cephadm/tests/test_service_discovery.py +++ b/src/pybind/mgr/cephadm/tests/test_service_discovery.py @@ -23,6 +23,10 @@ class FakeCache: return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'), FakeDaemonDescription('1.2.3.5', [10008], 'node1')] + if service_type == 'nfs': + return [FakeDaemonDescription('1.2.3.4', [9587], 'node0'), + FakeDaemonDescription('1.2.3.5', [9587], 'node1')] + return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'), FakeDaemonDescription('1.2.3.5', [9200], 'node1')] @@ -188,6 +192,20 @@ class TestServiceDiscovery: # check content assert cfg[0]['targets'] == ['1.2.3.4:10008'] + def test_get_sd_config_nfs(self): + mgr = FakeMgr() + root = Root(mgr, 5000, '0.0.0.0') + cfg = root.get_sd_config('nfs') + + # check response structure + assert cfg + for entry in cfg: + assert 'labels' in entry + assert 'targets' in entry + + # check content + assert cfg[0]['targets'] == ['1.2.3.4:9587'] + def test_get_sd_config_invalid_service(self): mgr = FakeMgr() root = Root(mgr, 5000, '0.0.0.0') diff --git a/src/pybind/mgr/cephadm/tests/test_services.py b/src/pybind/mgr/cephadm/tests/test_services.py index 617d12bc8b620..aa2172b13833e 100644 --- a/src/pybind/mgr/cephadm/tests/test_services.py +++ b/src/pybind/mgr/cephadm/tests/test_services.py @@ -783,6 +783,10 @@ class TestMonitoring: http_sd_configs: - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof + - job_name: 'nfs' + http_sd_configs: + - url: http://[::1]:8765/sd/prometheus/sd-config?service=nfs + - job_name: 'federate' scrape_interval: 15s honor_labels: true @@ -973,6 +977,19 @@ class TestMonitoring: tls_config: ca_file: root_cert.pem + - job_name: 'nfs' + honor_labels: true + scheme: https + tls_config: + ca_file: root_cert.pem + http_sd_configs: + - url: https://[::1]:8765/sd/prometheus/sd-config?service=nfs + basic_auth: + username: sd_user + password: sd_password + tls_config: + ca_file: root_cert.pem + """).lstrip() _run_cephadm.assert_called_with( -- 2.39.5