]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mgr/prometheus: fix metrics service not coming up 64385/head
authorNizamudeen A <nia@redhat.com>
Tue, 8 Jul 2025 13:37:35 +0000 (19:07 +0530)
committerNizamudeen A <nia@redhat.com>
Thu, 10 Jul 2025 08:57:32 +0000 (14:27 +0530)
https://github.com/ceph/ceph/pull/61468 unintentionally broke the http
metric service while it removed the code that starts the metrics. adding
them back up.

adding a basic test to catch these issues

Regression from https://github.com/ceph/ceph/pull/61468/commits/64f590cc8f03c9aab909c3ea5b9f53271ba3c15b#diff-031e09c4297d84a407cf55f8981d38764efc3c37e9827e12e638521f69284e1f

Fixes: https://tracker.ceph.com/issues/72012
Signed-off-by: Nizamudeen A <nia@redhat.com>
qa/suites/orch/cephadm/workunits/task/test_monitoring_stack_basic.yaml
src/pybind/mgr/prometheus/module.py

index 515293ea83a710966f576f52613d30af29bfd504..99fe1e91365add0335a76b289aa74772caaa236c 100644 (file)
@@ -64,3 +64,7 @@ tasks:
         curl -s http://${ALERTM_IP}:9093/api/v2/status
         curl -s http://${ALERTM_IP}:9093/api/v2/alerts
         curl -s http://${ALERTM_IP}:9093/api/v2/alerts | jq -e '.[] | select(.labels | .alertname == "CephMonDown") | .status | .state == "active"'
+        # check prometheus metrics endpoint is not empty and make sure we can get metrics
+        METRICS_URL=$(ceph mgr services | jq -r .prometheus)
+        [ -n "$METRICS_URL" ] || exit 1
+        curl -s "${METRICS_URL}metrics" | grep -q '^ceph_health_status'
index 85798954632555d2af191f3a3b1bc5a710df2fee..5d7eec5d5167ddaa1adf631299af9573e5061ebd 100644 (file)
@@ -1773,6 +1773,9 @@ class Module(MgrModule, OrchestratorClientMixin):
                 self.log.exception(f'Failed to setup cephadm based secure monitoring stack: {e}\n',
                                    'Falling back to default configuration')
 
+        # In any error fallback to plain http mode
+        self.setup_default_config(server_addr, server_port)
+
     def setup_default_config(self, server_addr: str, server_port: int) -> None:
         cherrypy.config.update({
             'server.socket_host': server_addr,