From: Guillaume Abrioux Date: Wed, 4 Feb 2026 14:15:23 +0000 (+0100) Subject: node-proxy: add periodic heartbeats in main and reporter loops X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=6bc55026b5554c33b57f727d79caabc0246ccd5e;p=ceph.git node-proxy: add periodic heartbeats in main and reporter loops This logs an info message every 5 minutes so that logs show the agent and reporter are still running when nothing else is logged. Fixes: https://tracker.ceph.com/issues/74749 Signed-off-by: Guillaume Abrioux --- diff --git a/src/ceph-node-proxy/ceph_node_proxy/main.py b/src/ceph-node-proxy/ceph_node_proxy/main.py index dbd44391290..a7869a4119c 100644 --- a/src/ceph-node-proxy/ceph_node_proxy/main.py +++ b/src/ceph-node-proxy/ceph_node_proxy/main.py @@ -151,6 +151,8 @@ class NodeProxyManager: max_interval = 300 backoff_factor = 1.5 consecutive_failures = 0 + heartbeat_interval = 300 + last_heartbeat = time.monotonic() while not self.stop: try: @@ -163,6 +165,13 @@ class NodeProxyManager: self.log.debug( "All threads are alive, next check in %ds.", check_interval ) + now = time.monotonic() + if now - last_heartbeat >= heartbeat_interval: + self.log.info( + "node-proxy running (heartbeat), next check in %ds.", + heartbeat_interval, + ) + last_heartbeat = now except Exception as e: consecutive_failures += 1 self.log.error( diff --git a/src/ceph-node-proxy/ceph_node_proxy/reporter.py b/src/ceph-node-proxy/ceph_node_proxy/reporter.py index 0e52f00f5ee..e9754ddfc35 100644 --- a/src/ceph-node-proxy/ceph_node_proxy/reporter.py +++ b/src/ceph-node-proxy/ceph_node_proxy/reporter.py @@ -8,6 +8,7 @@ from ceph_node_proxy.util import BaseThread, get_logger, http_req DEFAULT_MAX_RETRIES = 30 RETRY_SLEEP_SEC = 5 +HEARTBEAT_INTERVAL_SEC = 300 class Reporter(BaseThread): @@ -65,6 +66,7 @@ class Reporter(BaseThread): return False def main(self) -> None: + last_heartbeat = time.monotonic() while not self.stop: self.log.debug("waiting for a lock in reporter loop.") with self.system.lock: @@ -85,6 +87,13 @@ class Reporter(BaseThread): else: self.log.debug("no diff, not sending data to the mgr.") self.log.debug("lock released in reporter loop.") + now = time.monotonic() + if now - last_heartbeat >= HEARTBEAT_INTERVAL_SEC: + self.log.info( + "Reporter running (heartbeat), next check in %ds.", + HEARTBEAT_INTERVAL_SEC, + ) + last_heartbeat = now time.sleep(5) self.log.debug("exiting reporter loop.") raise SystemExit(0)