]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds/quiesce: prevent an overflow of the wait duration 56542/head
authorLeonid Usov <leonid.usov@ibm.com>
Thu, 28 Mar 2024 05:32:26 +0000 (01:32 -0400)
committerLeonid Usov <leonid.usov@ibm.com>
Tue, 2 Apr 2024 22:54:48 +0000 (18:54 -0400)
QuiesceTimeInterval::max() may overflow inside of a call to
std::condition_variable::wait_for and result in a busy-loop,
making the call to timeout immediately

The solution is to cap the wait duration to a value which can
certainly fit in whichever clock std library is using internally.

Fixes: https://tracker.ceph.com/issues/65276
Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
src/mds/QuiesceDbManager.cc

index bcbcbeed0b142b48db1fd8cccb858e1876aae3ec..d070434dbff3124c2c093b15ec3377e36a15d244 100644 (file)
@@ -71,11 +71,27 @@ void* QuiesceDbManager::quiesce_db_thread_main()
   dout(5) << "Entering the main thread" << dendl;
   bool keep_working = true;
   while (keep_working) {
-
-    auto db_age = db.get_age();
-
-    if (!db_thread_has_work() && next_event_at_age > db_age) {
-      submit_condition.wait_for(ls, next_event_at_age - db_age);
+    // QuiesceTimeInterval::max() value of next_event_at_age
+    // may cause an overflow in some stdlib implementations when calling
+    // std::condition_variable::wait_for(ls, next_event_at_age - db_age).
+    // The overflow can make the call timeout immediately,
+    // resulting in a busy-loop.
+    // The solution is to cap the wait duration to a value which can
+    // certainly fit in whichever clock std library is using internally.
+    const auto max_wait = std::chrono::duration_cast<QuiesceTimeInterval>(
+        std::chrono::seconds(10)
+    );
+
+    while (!db_thread_has_work()) {
+      auto db_age = db.get_age();
+      if (next_event_at_age <= db_age) {
+        break;
+      }
+      auto timeout = std::min(max_wait, next_event_at_age - db_age);
+      auto wait_result = submit_condition.wait_for(ls, timeout);
+      if (std::cv_status::timeout == wait_result) {
+        dout(20) << "db idle, age: " << db_age << dendl;
+      }
     }
 
     auto [is_member, should_exit] = membership_upkeep();