From 508e870ee383265b8489e18a4c73854616a4110a Mon Sep 17 00:00:00 2001 From: Leonid Usov Date: Thu, 28 Mar 2024 01:32:26 -0400 Subject: [PATCH] mds/quiesce: prevent an overflow of the wait duration QuiesceTimeInterval::max() may overflow inside of a call to std::condition_variable::wait_for and result in a busy-loop, making the call to timeout immediately The solution is to cap the wait duration to a value which can certainly fit in whichever clock std library is using internally. Fixes: https://tracker.ceph.com/issues/65276 Signed-off-by: Leonid Usov --- src/mds/QuiesceDbManager.cc | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mds/QuiesceDbManager.cc b/src/mds/QuiesceDbManager.cc index bcbcbeed0b142..d070434dbff31 100644 --- a/src/mds/QuiesceDbManager.cc +++ b/src/mds/QuiesceDbManager.cc @@ -71,11 +71,27 @@ void* QuiesceDbManager::quiesce_db_thread_main() dout(5) << "Entering the main thread" << dendl; bool keep_working = true; while (keep_working) { - - auto db_age = db.get_age(); - - if (!db_thread_has_work() && next_event_at_age > db_age) { - submit_condition.wait_for(ls, next_event_at_age - db_age); + // QuiesceTimeInterval::max() value of next_event_at_age + // may cause an overflow in some stdlib implementations when calling + // std::condition_variable::wait_for(ls, next_event_at_age - db_age). + // The overflow can make the call timeout immediately, + // resulting in a busy-loop. + // The solution is to cap the wait duration to a value which can + // certainly fit in whichever clock std library is using internally. + const auto max_wait = std::chrono::duration_cast( + std::chrono::seconds(10) + ); + + while (!db_thread_has_work()) { + auto db_age = db.get_age(); + if (next_event_at_age <= db_age) { + break; + } + auto timeout = std::min(max_wait, next_event_at_age - db_age); + auto wait_result = submit_condition.wait_for(ls, timeout); + if (std::cv_status::timeout == wait_result) { + dout(20) << "db idle, age: " << db_age << dendl; + } } auto [is_member, should_exit] = membership_upkeep(); -- 2.39.5