From 27f01a6a68c1871bfaf882c05fe7ddc5786e28f4 Mon Sep 17 00:00:00 2001 From: David Galloway Date: Fri, 27 Mar 2026 12:22:44 -0400 Subject: [PATCH] mgr/rook: Explicitly enable prometheus module Signed-off-by: David Galloway --- .../mgr/rook/ci/scripts/bootstrap-rook-cluster.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh b/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh index a2fab9608e31..5b0dfb18d100 100755 --- a/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh +++ b/src/pybind/mgr/rook/ci/scripts/bootstrap-rook-cluster.sh @@ -207,10 +207,18 @@ enable_monitoring() { $KUBECTL wait --for=condition=ready pod -l app.kubernetes.io/name=prometheus \ -n rook-ceph --timeout=120s - # Verify ceph mgr prometheus module is actually serving on port 9283 + # If the ceph mgr prometheus module is not yet serving on port 9283, enable + # it explicitly. We check first to avoid restarting an already-running + # instance, which would trigger a port 9283 conflict. local mgr_pod mgr_pod=$($KUBECTL -n rook-ceph get pods -l app=rook-ceph-mgr \ -o jsonpath='{.items[0].metadata.name}') + if ! $KUBECTL -n rook-ceph exec "$mgr_pod" -- \ + curl -sf http://localhost:9283/metrics 2>/dev/null | grep -q 'ceph_health_status'; then + echo "ceph mgr prometheus module not yet serving, enabling it..." + $KUBECTL -n rook-ceph exec deploy/rook-ceph-tools -- ceph mgr module enable prometheus + fi + local attempts=0 until $KUBECTL -n rook-ceph exec "$mgr_pod" -- \ curl -sf http://localhost:9283/metrics | grep -q 'ceph_health_status'; do -- 2.47.3