]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
cephadm/nvmeof: scrape nvmeof prometheus endpoint 56108/head
authorAvan Thakkar <athakkar@redhat.com>
Thu, 22 Feb 2024 11:00:06 +0000 (16:30 +0530)
committerAdam King <adking@redhat.com>
Sun, 10 Mar 2024 21:24:56 +0000 (17:24 -0400)
Fixes: https://tracker.ceph.com/issues/64536
Signed-off-by: Avan Thakkar <athakkar@redhat.com>
(cherry picked from commit 93ec6284fb3002b4778c4e54972ff1d864060922)

Conflicts:
src/cephadm/cephadmlib/constants.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
src/pybind/mgr/cephadm/tests/test_services.py

src/cephadm/cephadm.py
src/pybind/mgr/cephadm/module.py
src/pybind/mgr/cephadm/service_discovery.py
src/pybind/mgr/cephadm/services/monitoring.py
src/pybind/mgr/cephadm/services/nvmeof.py
src/pybind/mgr/cephadm/templates/services/nvmeof/ceph-nvmeof.conf.j2
src/pybind/mgr/cephadm/templates/services/prometheus/prometheus.yml.j2
src/pybind/mgr/cephadm/tests/test_service_discovery.py
src/pybind/mgr/cephadm/tests/test_services.py

index 99e0ac740a0eb76512fb018b98fbcbd9cd4f08d6..6ee73eb66a01def284274a7b80fd05ee762a8bd4 100755 (executable)
@@ -56,7 +56,7 @@ DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
 DEFAULT_GRAFANA_IMAGE = 'quay.io/ceph/ceph-grafana:9.4.7'
 DEFAULT_HAPROXY_IMAGE = 'quay.io/ceph/haproxy:2.3'
 DEFAULT_KEEPALIVED_IMAGE = 'quay.io/ceph/keepalived:2.2.4'
-DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.1'
+DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
 DEFAULT_SNMP_GATEWAY_IMAGE = 'docker.io/maxwo/snmp-notifier:v1.2.1'
 DEFAULT_ELASTICSEARCH_IMAGE = 'quay.io/omrizeneva/elasticsearch:6.8.23'
 DEFAULT_JAEGER_COLLECTOR_IMAGE = 'quay.io/jaegertracing/jaeger-collector:1.29'
index b59cf6687f9f46df331ebdb051deef25c7b459d6..796690ca67d6c9377e49f87fe73693d8fd62cad6 100644 (file)
@@ -109,7 +109,7 @@ os._exit = os_exit_noop   # type: ignore
 DEFAULT_IMAGE = 'quay.io/ceph/ceph'  # DO NOT ADD TAG TO THIS
 DEFAULT_PROMETHEUS_IMAGE = 'quay.io/prometheus/prometheus:v2.43.0'
 DEFAULT_NODE_EXPORTER_IMAGE = 'quay.io/prometheus/node-exporter:v1.5.0'
-DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:0.0.2'
+DEFAULT_NVMEOF_IMAGE = 'quay.io/ceph/nvmeof:1.0.0'
 DEFAULT_LOKI_IMAGE = 'docker.io/grafana/loki:2.4.0'
 DEFAULT_PROMTAIL_IMAGE = 'docker.io/grafana/promtail:2.4.0'
 DEFAULT_ALERT_MANAGER_IMAGE = 'quay.io/prometheus/alertmanager:v0.25.0'
index ddc0574e2b12c4ae8cd36bd7cc8114b7d4cce986..b3b7b5499eaf4823f9606368543343b42e05466b 100644 (file)
@@ -19,6 +19,7 @@ import secrets
 from cephadm.services.ingress import IngressSpec
 from cephadm.ssl_cert_utils import SSLCerts
 from cephadm.services.cephadmservice import CephExporterService
+from cephadm.services.nvmeof import NvmeofService
 
 if TYPE_CHECKING:
     from cephadm.module import CephadmOrchestrator
@@ -145,6 +146,7 @@ class Root(Server):
 <p><a href='prometheus/sd-config?service=node-exporter'>Node exporter http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=haproxy'>HAProxy http sd-config</a></p>
 <p><a href='prometheus/sd-config?service=ceph-exporter'>Ceph exporter http sd-config</a></p>
+<p><a href='prometheus/sd-config?service=nvmeof'>NVMeoF http sd-config</a></p>
 <p><a href='prometheus/rules'>Prometheus rules</a></p>
 </body>
 </html>'''
@@ -163,6 +165,8 @@ class Root(Server):
             return self.haproxy_sd_config()
         elif service == 'ceph-exporter':
             return self.ceph_exporter_sd_config()
+        elif service == 'nvmeof':
+            return self.nvmeof_sd_config()
         else:
             return []
 
@@ -231,6 +235,19 @@ class Root(Server):
             })
         return srv_entries
 
+    def nvmeof_sd_config(self) -> List[Dict[str, Collection[str]]]:
+        """Return <http_sd_config> compatible prometheus config for nvmeof service."""
+        srv_entries = []
+        for dd in self.mgr.cache.get_daemons_by_type('nvmeof'):
+            assert dd.hostname is not None
+            addr = dd.ip if dd.ip else self.mgr.inventory.get_addr(dd.hostname)
+            port = NvmeofService.PROMETHEUS_PORT
+            srv_entries.append({
+                'targets': [build_url(host=addr, port=port).lstrip('/')],
+                'labels': {'instance': dd.hostname}
+            })
+        return srv_entries
+
     @cherrypy.expose(alias='prometheus/rules')
     def get_prometheus_rules(self) -> str:
         """Return currently configured prometheus rules as Yaml."""
index 114c848608a307a19b8eee94287892e681603d81..d3439c04d04f506d07f5c2a1204d90d434ee5caf 100644 (file)
@@ -402,6 +402,7 @@ class PrometheusService(CephadmService):
         haproxy_sd_url = f'{srv_end_point}service=haproxy' if haproxy_cnt > 0 else None
         mgr_prometheus_sd_url = f'{srv_end_point}service=mgr-prometheus'  # always included
         ceph_exporter_sd_url = f'{srv_end_point}service=ceph-exporter'  # always included
+        nvmeof_sd_url = f'{srv_end_point}service=nvmeof'  # always included
 
         alertmanager_user, alertmanager_password = self.mgr._get_alertmanager_credentials()
         prometheus_user, prometheus_password = self.mgr._get_prometheus_credentials()
@@ -417,7 +418,8 @@ class PrometheusService(CephadmService):
             'node_exporter_sd_url': node_exporter_sd_url,
             'alertmanager_sd_url': alertmanager_sd_url,
             'haproxy_sd_url': haproxy_sd_url,
-            'ceph_exporter_sd_url': ceph_exporter_sd_url
+            'ceph_exporter_sd_url': ceph_exporter_sd_url,
+            'nvmeof_sd_url': nvmeof_sd_url,
         }
 
         web_context = {
index 7d2dd16cf0d6e3228e511e8f06c11c31d6b6fb62..5f28273d40caceab5344253f68a405a919eae86f 100644 (file)
@@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
 
 class NvmeofService(CephService):
     TYPE = 'nvmeof'
+    PROMETHEUS_PORT = 10008
 
     def config(self, spec: NvmeofServiceSpec) -> None:  # type: ignore
         assert self.TYPE == spec.service_type
index 69b8332cde391540ef1ad9500740de1d621ae92f..17290f5041dc85fa856576bf44b66f3f7bab5657 100644 (file)
@@ -7,6 +7,9 @@ port = {{ port }}
 enable_auth = {{ spec.enable_auth }}
 state_update_notify = True
 state_update_interval_sec = 5
+enable_prometheus_exporter = True
+prometheus_exporter_ssl = False
+prometheus_port = 10008
 
 [ceph]
 pool = {{ spec.pool }}
index b56843994555e944e0e8a52b16b8cd3a5df45cdb..57d2f8a3f4b46ceb3eaa8e638d3a26551e94e3af 100644 (file)
@@ -107,3 +107,23 @@ scrape_configs:
     - url: {{ ceph_exporter_sd_url }}
 {% endif %}
 {% endif %}
+
+{% if nvmeof_sd_url %}
+  - job_name: 'nvmeof'
+{% if secure_monitoring_stack %}
+    honor_labels: true
+    scheme: https
+    tls_config:
+      ca_file: root_cert.pem
+    http_sd_configs:
+    - url: {{ nvmeof_sd_url }}
+      basic_auth:
+        username: {{ service_discovery_username }}
+        password: {{ service_discovery_password }}
+      tls_config:
+        ca_file: root_cert.pem
+{% else %}
+    http_sd_configs:
+    - url: {{ nvmeof_sd_url }}
+{% endif %}
+{% endif %}
index ff98a13885f8097094c5d0b22b7fdb69e953d1ff..687b64553eaaddc8b103186c48d370148022544b 100644 (file)
@@ -19,6 +19,9 @@ class FakeCache:
         if service_type == 'ceph-exporter':
             return [FakeDaemonDescription('1.2.3.4', [9926], 'node0'),
                     FakeDaemonDescription('1.2.3.5', [9926], 'node1')]
+        if service_type == 'nvmeof':
+            return [FakeDaemonDescription('1.2.3.4', [10008], 'node0'),
+                    FakeDaemonDescription('1.2.3.5', [10008], 'node1')]
 
         return [FakeDaemonDescription('1.2.3.4', [9100], 'node0'),
                 FakeDaemonDescription('1.2.3.5', [9200], 'node1')]
@@ -171,6 +174,20 @@ class TestServiceDiscovery:
         # check content
         assert cfg[0]['targets'] == ['1.2.3.4:9926']
 
+    def test_get_sd_config_nvmeof(self):
+        mgr = FakeMgr()
+        root = Root(mgr, 5000, '0.0.0.0')
+        cfg = root.get_sd_config('nvmeof')
+
+        # check response structure
+        assert cfg
+        for entry in cfg:
+            assert 'labels' in entry
+            assert 'targets' in entry
+
+        # check content
+        assert cfg[0]['targets'] == ['1.2.3.4:10008']
+
     def test_get_sd_config_invalid_service(self):
         mgr = FakeMgr()
         root = Root(mgr, 5000, '0.0.0.0')
index 2300b288d2951c1485ecb388a01d09e3e79d7856..6bafe518f2e8c42da0765e7c36cc39c8a32570ef 100644 (file)
@@ -376,6 +376,9 @@ port = {default_port}
 enable_auth = False
 state_update_notify = True
 state_update_interval_sec = 5
+enable_prometheus_exporter = True
+prometheus_exporter_ssl = False
+prometheus_port = 10008
 
 [ceph]
 pool = {pool}
@@ -699,6 +702,10 @@ class TestMonitoring:
                     honor_labels: true
                     http_sd_configs:
                     - url: http://[::1]:8765/sd/prometheus/sd-config?service=ceph-exporter
+
+                  - job_name: 'nvmeof'
+                    http_sd_configs:
+                    - url: http://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
                 """).lstrip()
 
                 _run_cephadm.assert_called_with(
@@ -855,6 +862,19 @@ class TestMonitoring:
                         password: sd_password
                       tls_config:
                         ca_file: root_cert.pem
+
+                  - job_name: 'nvmeof'
+                    honor_labels: true
+                    scheme: https
+                    tls_config:
+                      ca_file: root_cert.pem
+                    http_sd_configs:
+                    - url: https://[::1]:8765/sd/prometheus/sd-config?service=nvmeof
+                      basic_auth:
+                        username: sd_user
+                        password: sd_password
+                      tls_config:
+                        ca_file: root_cert.pem
                 """).lstrip()
 
                 _run_cephadm.assert_called_with(