]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
nvmeof: refactor timer for exact frequency timing with drift correction wip-baum-20251020-01
authorAlexander Indenbaum <aindenba@redhat.com>
Mon, 20 Oct 2025 09:00:04 +0000 (12:00 +0300)
committerAlexander Indenbaum <aindenba@redhat.com>
Mon, 20 Oct 2025 09:25:45 +0000 (12:25 +0300)
- Replace relative timing with absolute time calculation to prevent drift
- Implement drift detection and correction mechanism
- Add comprehensive logging for timer firing, execution duration, drift
  and rescheduling

This refactoring ensures the nvmeof monitor client maintains exact frequency
timing over long periods, even when individual tick executions are delayed.

Signed-off-by: Alexander Indenbaum <aindenba@redhat.com>
src/nvmeof/NVMeofGwMonitorClient.cc
src/nvmeof/NVMeofGwMonitorClient.h

index c362bf21ca2a82020dd00884a269d5175b41f24d..fc8595a34184b09f9417d2cde3623af4108238f3 100644 (file)
@@ -184,7 +184,12 @@ int NVMeofGwMonitorClient::init()
 
   {
     std::lock_guard bl(beacon_lock);
-    tick();
+    // Initialize timer state for exact frequency timing
+    next_tick_time = ceph::mono_clock::now();
+    timer_active = true;
+    auto tick_period = g_conf().get_val<std::chrono::seconds>("nvmeof_mon_client_tick_period");
+    dout(4) << "Initializing timer with period " << tick_period.count() << " seconds" << dendl;
+    schedule_next_tick();
   }
 
   dout(10) << "Complete." << dendl;
@@ -288,12 +293,84 @@ void NVMeofGwMonitorClient::tick()
   disconnect_panic();
   send_beacon();
   first_beacon = false;
-  timer.add_event_after(
-      g_conf().get_val<std::chrono::seconds>("nvmeof_mon_client_tick_period").count(),
-      new LambdaContext([this](int r){
-          tick();
-      }
-  ));
+
+  // Schedule next tick with exact frequency timing
+  if (timer_active) {
+    schedule_next_tick();
+  }
+}
+
+void NVMeofGwMonitorClient::schedule_next_tick()
+{
+  ceph_assert(ceph_mutex_is_locked_by_me(beacon_lock));
+
+  if (!timer_active) {
+    return;
+  }
+
+  // Calculate next tick time based on configured interval
+  auto tick_period = g_conf().get_val<std::chrono::seconds>("nvmeof_mon_client_tick_period");
+  next_tick_time += tick_period;
+
+  // Get current time to check for drift
+  auto now = ceph::mono_clock::now();
+
+  // If we're behind schedule, adjust next_tick_time to prevent drift accumulation
+  if (next_tick_time < now) {
+    dout(1) << "Timer drift detected, adjusting next_tick_time from "
+             << next_tick_time << " to " << now << dendl;
+    next_tick_time = now;
+  }
+
+  // Schedule the next tick
+  auto cb = new LambdaContext([this](int r) {
+    std::lock_guard bl(beacon_lock);
+    if (timer_active) {
+      tick_callback();
+    }
+  });
+
+  timer.add_event_at(next_tick_time, cb);
+  auto delay_ns = (next_tick_time - now).count();
+  dout(10) << "Rescheduled next tick at " << next_tick_time
+           << " (delay: " << delay_ns << " ns, "
+           << (delay_ns / 1000000.0) << " ms)" << dendl;
+}
+
+void NVMeofGwMonitorClient::tick_callback()
+{
+  ceph_assert(ceph_mutex_is_locked_by_me(beacon_lock));
+
+  if (!timer_active) {
+    dout(10) << "Timer fired but timer_active=false, ignoring" << dendl;
+    return;
+  }
+
+  auto start_time = ceph::mono_clock::now();
+  dout(10) << "Timer fired at " << start_time << dendl;
+
+  // Perform the actual tick work
+  connect_panic();
+  disconnect_panic();
+  send_beacon();
+  first_beacon = false;
+
+  auto end_time = ceph::mono_clock::now();
+  auto duration = end_time - start_time;
+  auto duration_ms = duration.count() / 1000000.0;
+  dout(10) << "Tick execution completed in " << duration.count() << " ns (" << duration_ms << " ms)" << dendl;
+
+  // Warn if execution time is significant compared to tick period
+  auto tick_period = g_conf().get_val<std::chrono::seconds>("nvmeof_mon_client_tick_period");
+  auto tick_period_ms = tick_period.count() * 1000.0;
+  if (duration_ms > tick_period_ms * 0.1) {  // Warn if > 10% of tick period
+    dout(1) << "WARNING: Tick execution time (" << duration_ms << " ms) is "
+            << (duration_ms / tick_period_ms * 100) << "% of tick period ("
+            << tick_period_ms << " ms)" << dendl;
+  }
+
+  // Schedule next tick
+  schedule_next_tick();
 }
 
 void NVMeofGwMonitorClient::shutdown()
@@ -306,6 +383,7 @@ void NVMeofGwMonitorClient::shutdown()
   // stop sending beacon first, I use monc to talk with monitors
   {
     std::lock_guard bl(beacon_lock);
+    timer_active = false;
     timer.shutdown();
   }
 
index b1ca5c94debb1b0a85b09de2b9234ab3d49a2aca..380a202f3c626a5699b18b5eb877badcb5c3bcfd 100644 (file)
@@ -21,6 +21,7 @@
 #include "common/Finisher.h"
 #include "common/Timer.h"
 #include "common/LogClient.h"
+#include "common/ceph_time.h"
 
 #include "mon/MonClient.h"
 #include "osdc/Objecter.h"
@@ -71,11 +72,19 @@ protected:
   ceph::mutex beacon_lock = ceph::make_mutex("NVMeofGw::beacon_lock");
   SafeTimer timer;
 
+  // Timer state for exact frequency timing
+  ceph::mono_clock::time_point next_tick_time;
+  bool timer_active = false;
+
   int orig_argc;
   const char **orig_argv;
 
   void send_config_beacon(); 
   void send_beacon();
+
+  // Timer management for exact frequency
+  void schedule_next_tick();
+  void tick_callback();
  
 public:
   NVMeofGwMonitorClient(int argc, const char **argv);