]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
cephadm/nvmeof: scrape nvmeof prometheus endpoint
authorAvan Thakkar <athakkar@redhat.com>
Thu, 22 Feb 2024 11:00:06 +0000 (16:30 +0530)
committerAvan Thakkar <athakkar@redhat.com>
Sat, 24 Feb 2024 16:56:25 +0000 (22:26 +0530)
Fixes: https://tracker.ceph.com/issues/64536
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
src/cephadm/cephadmlib/constants.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/service_discovery.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/services/nvmeof.py
src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
src/pybind/mgr/cephadm/tests/test_service_discovery.py
src/pybind/mgr/cephadm/tests/test_services.py

index dfa660f48986338e46a656c7c31f80c7837aefc6..119f43b459b6baca36579db4abf5607a9ad7fdfa 100644 (file)
@@ -12,7 +12,7 @@ DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
 DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.12'
 DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
 DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
-DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:latest'
+DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
 DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
 DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
 DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
index 87f7024bb2555510a7418f357634559a2bed0593..c43152856a60d2cbd5f266cda3c82ec181cec28d 100644 (file)
@@ -117,7 +117,7 @@ os._exit = os_exit_noop   # type: ignore
 DEFAULT_IMAGE = 'quay.io/ceph/ceph'
 DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
 DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
-DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:latest'
+DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
 DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
 DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
 DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
index b681cc8e7ff7b4ab40ab5fd902911ef3f9403bb1..2b82f87493ff3dea7f30b986ce8e41c0bb1f5551 100644 (file)
@@ -19,6 +19,7 @@ import secrets
 from cephadm.services.ingress import IngressSpec
 from cephadm.ssl_cert_utils import SSLCerts
 from cephadm.services.cephadmservice import CephExporterService
+from cephadm.services.nvmeof import NvmeofService
 
 if TYPE_CHECKING:
     from cephadm.module import CephadmOrchestrator
@@ -145,6 +146,7 @@ class Root(Server):
 <p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
 <p><a href='prometheus/rules'>Prometheus rules</a></p>
 </body>
 </html>'''
@@ -163,6 +165,8 @@ class Root(Server):
             return self.haproxy_sd_config()
         elif service == 'ceph-exporter':
             return self.ceph_exporter_sd_config()
+        elif service == 'nvmeof':
+            return self.nvmeof_sd_config()
         else:
             return []
 
@@ -231,6 +235,19 @@ class Root(Server):
             })
         return srv_entries
 
+    def nvmeof_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for nvmeof service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_type('nvmeof'):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = NvmeofService.PROMETHEUS_PORT
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
     @cherrypy.expose(alias='prometheus/rules')
     def get_prometheus_rules(self) -> str:
         """Return currently configured prometheus rules as Yaml."""
index 114c848608a307a19b8eee94287892e681603d81..d3439c04d04f506d07f5c2a1204d90d434ee5caf 100644 (file)
@@ -402,6 +402,7 @@ class PrometheusService(CephadmService):
         haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None
         mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus'  # always included
         ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter'  # always included
+        nvmeof_sd_url = f'{srv_end_point}service=nvmeof'  # always included
 
         alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
         prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -417,7 +418,8 @@ class PrometheusService(CephadmService):
             'node_exporter_sd_url': node_exporter_sd_url,
             'alertmanager_sd_url': alertmanager_sd_url,
             'haproxy_sd_url': haproxy_sd_url,
-            'ceph_exporter_sd_url': ceph_exporter_sd_url
+            'ceph_exporter_sd_url': ceph_exporter_sd_url,
+            'nvmeof_sd_url': nvmeof_sd_url,
         }
 
         web_context = {
index 7d2de75f67f97a4bc745f59ed1201af6812389de..6cd1f4604d312cea1605b8b2faafbeb56b74b160 100644 (file)
@@ -16,6 +16,7 @@ logger = logging.getLogger(__name__)
 
 class NvmeofService(CephService):
     TYPE = 'nvmeof'
+    PROMETHEUS_PORT = 10008
 
     def config(self, spec: NvmeofServiceSpec) -> None:  # type: ignore
         assert self.TYPE == spec.service_type
index 4aa0b90935770b6d5d818bdb1841868ba59ca269..711af0ee724aba9b3a3b97e771cd9939fedb9c44 100644 (file)
@@ -10,6 +10,9 @@ state_update_interval_sec = 5
 min_controller_id = {{ spec.min_controller_id }}
 max_controller_id = {{ spec.max_controller_id }}
 enable_spdk_discovery_controller = {{ spec.enable_spdk_discovery_controller }}
+enable_prometheus_exporter = True
+prometheus_exporter_ssl = False
+prometheus_port = 10008
 
 [ceph]
 pool = {{ spec.pool }}
index b56843994555e944e0e8a52b16b8cd3a5df45cdb..57d2f8a3f4b46ceb3eaa8e638d3a26551e94e3af 100644 (file)
@@ -107,3 +107,23 @@ scrape_configs:
     - url: {{ ceph_exporter_sd_url }}
 {% endif %}
 {% endif %}
+
+{% if nvmeof_sd_url %}
+  - job_name: 'nvmeof'
+{% if secure_monitoring_stack %}
+    honor_labels: true
+    scheme: https
+    tls_config:
+      ca_file: root_cert.pem
+    http_sd_configs:
+    - url: {{ nvmeof_sd_url }}
+      basic_auth:
+        username: {{ service_discovery_username }}
+        password: {{ service_discovery_password }}
+      tls_config:
+        ca_file: root_cert.pem
+{% else %}
+    http_sd_configs:
+    - url: {{ nvmeof_sd_url }}
+{% endif %}
+{% endif %}
index ff98a13885f8097094c5d0b22b7fdb69e953d1ff..687b64553eaaddc8b103186c48d370148022544b 100644 (file)
@@ -19,6 +19,9 @@ class FakeCache:
         if service_type == 'ceph-exporter':
             return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'),
                     FakeDaemonDescription('1.2.3.5', [9926], 'node1')]
+        if service_type == 'nvmeof':
+            return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'),
+                    FakeDaemonDescription('1.2.3.5', [10008], 'node1')]
 
         return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
                 FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
@@ -171,6 +174,20 @@ class TestServiceDiscovery:
         # check content
         assert cfg[0]['targets'] == ['1.2.3.4:9926']
 
+    def test_get_sd_config_nvmeof(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('nvmeof')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['1.2.3.4:10008']
+
     def test_get_sd_config_invalid_service(self):
         mgr = FakeMgr()
         root = Root(mgr, 5000, '0.0.0.0')
index dbab022058d424e132d8a8e26f0de1198f129884..6c157ea433633600536c53ca30c113da815de456 100644 (file)
@@ -393,6 +393,9 @@ state_update_interval_sec = 5
 min_controller_id = 1
 max_controller_id = 65519
 enable_spdk_discovery_controller = False
+enable_prometheus_exporter = True
+prometheus_exporter_ssl = False
+prometheus_port = 10008
 
 [ceph]
 pool = {pool}
@@ -716,6 +719,10 @@ class TestMonitoring:
                     honor_labels: true
                     http_sd_configs:
                     - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
+
+                  - job_name: 'nvmeof'
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
                 """).lstrip()
 
                 _run_cephadm.assert_called_with(
@@ -872,6 +879,19 @@ class TestMonitoring:
                         password: sd_password
                       tls_config:
                         ca_file: root_cert.pem
+
+                  - job_name: 'nvmeof'
+                    honor_labels: true
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
+                      basic_auth:
+                        username: sd_user
+                        password: sd_password
+                      tls_config:
+                        ca_file: root_cert.pem
                 """).lstrip()
 
                 _run_cephadm.assert_called_with(