]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
node-proxy: add periodic heartbeats in main and reporter loops
authorGuillaume Abrioux <gabrioux@ibm.com>
Wed, 4 Feb 2026 14:15:23 +0000 (15:15 +0100)
committerGuillaume Abrioux <gabrioux@ibm.com>
Wed, 18 Feb 2026 08:52:38 +0000 (09:52 +0100)
This logs an info message every 5 minutes so that logs show the agent
and reporter are still running when nothing else is logged.

Fixes: https://tracker.ceph.com/issues/74749
Signed-off-by: Guillaume Abrioux <gabrioux@ibm.com>
src/ceph-node-proxy/ceph_node_proxy/main.py
src/ceph-node-proxy/ceph_node_proxy/reporter.py

index dbd44391290db7491486778b49726281c14dfcf7..a7869a4119c2d1fc6fc378339d2037cac3c70672 100644 (file)
@@ -151,6 +151,8 @@ class NodeProxyManager:
         max_interval = 300
         backoff_factor = 1.5
         consecutive_failures = 0
+        heartbeat_interval = 300
+        last_heartbeat = time.monotonic()
 
         while not self.stop:
             try:
@@ -163,6 +165,13 @@ class NodeProxyManager:
                 self.log.debug(
                     "All threads are alive, next check in %ds.", check_interval
                 )
+                now = time.monotonic()
+                if now - last_heartbeat >= heartbeat_interval:
+                    self.log.info(
+                        "node-proxy running (heartbeat), next check in %ds.",
+                        heartbeat_interval,
+                    )
+                    last_heartbeat = now
             except Exception as e:
                 consecutive_failures += 1
                 self.log.error(
index 0e52f00f5ee1493dfed60bdaa1caf8a20c839664..e9754ddfc3588b79e9d03858eda1f1ce6df09853 100644 (file)
@@ -8,6 +8,7 @@ from ceph_node_proxy.util import BaseThread, get_logger, http_req
 
 DEFAULT_MAX_RETRIES = 30
 RETRY_SLEEP_SEC = 5
+HEARTBEAT_INTERVAL_SEC = 300
 
 
 class Reporter(BaseThread):
@@ -65,6 +66,7 @@ class Reporter(BaseThread):
         return False
 
     def main(self) -> None:
+        last_heartbeat = time.monotonic()
         while not self.stop:
             self.log.debug("waiting for a lock in reporter loop.")
             with self.system.lock:
@@ -85,6 +87,13 @@ class Reporter(BaseThread):
                         else:
                             self.log.debug("no diff, not sending data to the mgr.")
             self.log.debug("lock released in reporter loop.")
+            now = time.monotonic()
+            if now - last_heartbeat >= HEARTBEAT_INTERVAL_SEC:
+                self.log.info(
+                    "Reporter running (heartbeat), next check in %ds.",
+                    HEARTBEAT_INTERVAL_SEC,
+                )
+                last_heartbeat = now
             time.sleep(5)
         self.log.debug("exiting reporter loop.")
         raise SystemExit(0)