]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/nfs: scrape nfs monitoring endpoint 61719/head
authoravanthakkar <avanjohn@gmail.com>
Wed, 3 Jan 2024 11:52:33 +0000 (17:22 +0530)
committerAdam King <adking@redhat.com>
Wed, 12 Feb 2025 15:54:47 +0000 (10:54 -0500)
Fixes: https://tracker.ceph.com/issues/62558
Signed-off-by: avanthakkar <avanjohn@gmail.com>
(cherry picked from commit 5fb45e5fb8330721e03b04d493202c9c845e33b1)

Conflicts:
src/pybind/mgr/cephadm/service_discovery.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2

src/pybind/mgr/cephadm/service_discovery.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/services/nfs.py
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
src/pybind/mgr/cephadm/tests/test_service_discovery.py
src/pybind/mgr/cephadm/tests/test_services.py

index b3b7b5499eaf4823f9606368543343b42e05466b..3601435afa90e40c5da7a809d6375d24d365ef5c 100644 (file)
@@ -13,6 +13,7 @@ import orchestrator  # noqa
 from mgr_module import ServiceInfoT
 from mgr_util import build_url
 from typing import Dict, List, TYPE_CHECKING, cast, Collection, Callable, NamedTuple, Optional
+from cephadm.services.nfs import NFSService
 from cephadm.services.monitoring import AlertmanagerService, NodeExporterService, PrometheusService
 import secrets
 
@@ -147,6 +148,7 @@ class Root(Server):
 <p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=nfs'>NFS http sd-config</a></p>
 <p><a href='prometheus/rules'>Prometheus rules</a></p>
 </body>
 </html>'''
@@ -167,6 +169,8 @@ class Root(Server):
             return self.ceph_exporter_sd_config()
         elif service == 'nvmeof':
             return self.nvmeof_sd_config()
+        elif service == 'nfs':
+            return self.nfs_sd_config()
         else:
             return []
 
@@ -248,6 +252,19 @@ class Root(Server):
             })
         return srv_entries
 
+    def nfs_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for nfs service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_type('nfs'):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = NFSService.DEFAULT_EXPORTER_PORT
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
     @cherrypy.expose(alias='prometheus/rules')
     def get_prometheus_rules(self) -> str:
         """Return currently configured prometheus rules as Yaml."""
index 0576d4652d0399cb54e5c25d6f56b8163fe74dc2..c9fa53785971b28a435463ee8f888e06309a44e3 100644 (file)
@@ -419,6 +419,7 @@ class PrometheusService(CephadmService):
         mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus'  # always included
         ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter'  # always included
         nvmeof_sd_url = f'{srv_end_point}service=nvmeof'  # always included
+        nfs_sd_url = f'{srv_end_point}service=nfs'  # always included
 
         alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
         prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -438,7 +439,8 @@ class PrometheusService(CephadmService):
             'ceph_exporter_sd_url': ceph_exporter_sd_url,
             'nvmeof_sd_url': nvmeof_sd_url,
             'external_prometheus_targets': targets,
-            'cluster_fsid': FSID
+            'cluster_fsid': FSID,
+            'nfs_sd_url': nfs_sd_url
         }
 
         ip_to_bind_to = ''
index f46f65b084beaeb3da97ac6665ba34f99da2e7ca..fbddccd20bee4b6d2da72fb73e9a69bd90ee8a5a 100644 (file)
@@ -22,6 +22,7 @@ logger = logging.getLogger(__name__)
 
 class NFSService(CephService):
     TYPE = 'nfs'
+    DEFAULT_EXPORTER_PORT = 9587
 
     def ranked(self) -> bool:
         return True
index faccc8f6de26cf2b1d0427328c02d7c06ae588ec..83f827d9627eb7037871b99d0eb4a428b960f8ce 100644 (file)
@@ -156,6 +156,26 @@ scrape_configs:
 {% endif %}
 {% endif %}
 
+{% if nfs_sd_url %}
+  - job_name: 'nfs'
+{% if secure_monitoring_stack %}
+    honor_labels: true
+    scheme: https
+    tls_config:
+      ca_file: root_cert.pem
+    http_sd_configs:
+    - url: {{ nfs_sd_url }}
+      basic_auth:
+        username: {{ service_discovery_username }}
+        password: {{ service_discovery_password }}
+      tls_config:
+        ca_file: root_cert.pem
+{% else %}
+    http_sd_configs:
+    - url: {{ nfs_sd_url }}
+{% endif %}
+{% endif %}
+
 {% if not secure_monitoring_stack %}
   - job_name: 'federate'
     scrape_interval: 15s
@@ -170,4 +190,3 @@ scrape_configs:
     static_configs:
     - targets: {{ external_prometheus_targets }}
 {% endif %}
-
index 687b64553eaaddc8b103186c48d370148022544b..159431b3b889ab9f03f7f81808175b2a0544dce7 100644 (file)
@@ -23,6 +23,10 @@ class FakeCache:
             return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'),
                     FakeDaemonDescription('1.2.3.5', [10008], 'node1')]
 
+        if service_type == 'nfs':
+            return [FakeDaemonDescription('1.2.3.4', [9587], 'node0'),
+                    FakeDaemonDescription('1.2.3.5', [9587], 'node1')]
+
         return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
                 FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
 
@@ -188,6 +192,20 @@ class TestServiceDiscovery:
         # check content
         assert cfg[0]['targets'] == ['1.2.3.4:10008']
 
+    def test_get_sd_config_nfs(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('nfs')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['1.2.3.4:9587']
+
     def test_get_sd_config_invalid_service(self):
         mgr = FakeMgr()
         root = Root(mgr, 5000, '0.0.0.0')
index 71776a8e16a93720af2fdea2c3e856626869e2b6..78a9a401fd24d9c8c3e3893b1228eb399f5c4e9a 100644 (file)
@@ -731,6 +731,10 @@ class TestMonitoring:
                     http_sd_configs:
                     - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
 
+                  - job_name: 'nfs'
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=nfs
+
                   - job_name: 'federate'
                     scrape_interval: 15s
                     honor_labels: true
@@ -920,6 +924,19 @@ class TestMonitoring:
                       tls_config:
                         ca_file: root_cert.pem
 
+                  - job_name: 'nfs'
+                    honor_labels: true
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=nfs
+                      basic_auth:
+                        username: sd_user
+                        password: sd_password
+                      tls_config:
+                        ca_file: root_cert.pem
+
                 """).lstrip()
 
                 _run_cephadm.assert_called_with(