From b25d6df27778d3a90aae85dae730b090342f40b1 Mon Sep 17 00:00:00 2001 From: Alexander Indenbaum Date: Mon, 20 Oct 2025 12:00:04 +0300 Subject: [PATCH] nvmeof: refactor timer for exact frequency timing with drift correction - Replace relative timing with absolute time calculation to prevent drift - Implement drift detection and correction mechanism - Add comprehensive logging for timer firing, execution duration, drift and rescheduling This refactoring ensures the nvmeof monitor client maintains exact frequency timing over long periods, even when individual tick executions are delayed. Signed-off-by: Alexander Indenbaum --- src/nvmeof/NVMeofGwMonitorClient.cc | 92 ++++++++++++++++++++++++++--- src/nvmeof/NVMeofGwMonitorClient.h | 9 +++ 2 files changed, 94 insertions(+), 7 deletions(-) diff --git a/src/nvmeof/NVMeofGwMonitorClient.cc b/src/nvmeof/NVMeofGwMonitorClient.cc index c362bf21ca2..fc8595a3418 100644 --- a/src/nvmeof/NVMeofGwMonitorClient.cc +++ b/src/nvmeof/NVMeofGwMonitorClient.cc @@ -184,7 +184,12 @@ int NVMeofGwMonitorClient::init() { std::lock_guard bl(beacon_lock); - tick(); + // Initialize timer state for exact frequency timing + next_tick_time = ceph::mono_clock::now(); + timer_active = true; + auto tick_period = g_conf().get_val("nvmeof_mon_client_tick_period"); + dout(4) << "Initializing timer with period " << tick_period.count() << " seconds" << dendl; + schedule_next_tick(); } dout(10) << "Complete." << dendl; @@ -288,12 +293,84 @@ void NVMeofGwMonitorClient::tick() disconnect_panic(); send_beacon(); first_beacon = false; - timer.add_event_after( - g_conf().get_val("nvmeof_mon_client_tick_period").count(), - new LambdaContext([this](int r){ - tick(); - } - )); + + // Schedule next tick with exact frequency timing + if (timer_active) { + schedule_next_tick(); + } +} + +void NVMeofGwMonitorClient::schedule_next_tick() +{ + ceph_assert(ceph_mutex_is_locked_by_me(beacon_lock)); + + if (!timer_active) { + return; + } + + // Calculate next tick time based on configured interval + auto tick_period = g_conf().get_val("nvmeof_mon_client_tick_period"); + next_tick_time += tick_period; + + // Get current time to check for drift + auto now = ceph::mono_clock::now(); + + // If we're behind schedule, adjust next_tick_time to prevent drift accumulation + if (next_tick_time < now) { + dout(1) << "Timer drift detected, adjusting next_tick_time from " + << next_tick_time << " to " << now << dendl; + next_tick_time = now; + } + + // Schedule the next tick + auto cb = new LambdaContext([this](int r) { + std::lock_guard bl(beacon_lock); + if (timer_active) { + tick_callback(); + } + }); + + timer.add_event_at(next_tick_time, cb); + auto delay_ns = (next_tick_time - now).count(); + dout(10) << "Rescheduled next tick at " << next_tick_time + << " (delay: " << delay_ns << " ns, " + << (delay_ns / 1000000.0) << " ms)" << dendl; +} + +void NVMeofGwMonitorClient::tick_callback() +{ + ceph_assert(ceph_mutex_is_locked_by_me(beacon_lock)); + + if (!timer_active) { + dout(10) << "Timer fired but timer_active=false, ignoring" << dendl; + return; + } + + auto start_time = ceph::mono_clock::now(); + dout(10) << "Timer fired at " << start_time << dendl; + + // Perform the actual tick work + connect_panic(); + disconnect_panic(); + send_beacon(); + first_beacon = false; + + auto end_time = ceph::mono_clock::now(); + auto duration = end_time - start_time; + auto duration_ms = duration.count() / 1000000.0; + dout(10) << "Tick execution completed in " << duration.count() << " ns (" << duration_ms << " ms)" << dendl; + + // Warn if execution time is significant compared to tick period + auto tick_period = g_conf().get_val("nvmeof_mon_client_tick_period"); + auto tick_period_ms = tick_period.count() * 1000.0; + if (duration_ms > tick_period_ms * 0.1) { // Warn if > 10% of tick period + dout(1) << "WARNING: Tick execution time (" << duration_ms << " ms) is " + << (duration_ms / tick_period_ms * 100) << "% of tick period (" + << tick_period_ms << " ms)" << dendl; + } + + // Schedule next tick + schedule_next_tick(); } void NVMeofGwMonitorClient::shutdown() @@ -306,6 +383,7 @@ void NVMeofGwMonitorClient::shutdown() // stop sending beacon first, I use monc to talk with monitors { std::lock_guard bl(beacon_lock); + timer_active = false; timer.shutdown(); } diff --git a/src/nvmeof/NVMeofGwMonitorClient.h b/src/nvmeof/NVMeofGwMonitorClient.h index b1ca5c94deb..380a202f3c6 100644 --- a/src/nvmeof/NVMeofGwMonitorClient.h +++ b/src/nvmeof/NVMeofGwMonitorClient.h @@ -21,6 +21,7 @@ #include "common/Finisher.h" #include "common/Timer.h" #include "common/LogClient.h" +#include "common/ceph_time.h" #include "mon/MonClient.h" #include "osdc/Objecter.h" @@ -71,11 +72,19 @@ protected: ceph::mutex beacon_lock = ceph::make_mutex("NVMeofGw::beacon_lock"); SafeTimer timer; + // Timer state for exact frequency timing + ceph::mono_clock::time_point next_tick_time; + bool timer_active = false; + int orig_argc; const char **orig_argv; void send_config_beacon(); void send_beacon(); + + // Timer management for exact frequency + void schedule_next_tick(); + void tick_callback(); public: NVMeofGwMonitorClient(int argc, const char **argv); -- 2.39.5