]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds/quiesce: prevent an overflow of the wait duration 56668/head
authorLeonid Usov <leonid.usov@ibm.com>
Thu, 28 Mar 2024 05:32:26 +0000 (01:32 -0400)
committerPatrick Donnelly <pdonnell@redhat.com>
Wed, 3 Apr 2024 13:46:04 +0000 (09:46 -0400)
QuiesceTimeInterval::max() may overflow inside of a call to
std::condition_variable::wait_for and result in a busy-loop,
making the call to timeout immediately

The solution is to cap the wait duration to a value which can
certainly fit in whichever clock std library is using internally.

Fixes: https://tracker.ceph.com/issues/65276
Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
(cherry picked from commit 508e870ee383265b8489e18a4c73854616a4110a)

src/mds/QuiesceDbManager.cc

index 6fccaacf10c475c97a1b57b8f286796de3a8fe43..c1a2cf0d0401b47319f8402fc778a3565eee4a32 100644 (file)
@@ -71,11 +71,27 @@ void* QuiesceDbManager::quiesce_db_thread_main()
   dout(5) << "Entering the main thread" << dendl;
   bool keep_working = true;
   while (keep_working) {
-
-    auto db_age = db.get_age();
-
-    if (!db_thread_has_work() && next_event_at_age > db_age) {
-      submit_condition.wait_for(ls, next_event_at_age - db_age);
+    // QuiesceTimeInterval::max() value of next_event_at_age
+    // may cause an overflow in some stdlib implementations when calling
+    // std::condition_variable::wait_for(ls, next_event_at_age - db_age).
+    // The overflow can make the call timeout immediately,
+    // resulting in a busy-loop.
+    // The solution is to cap the wait duration to a value which can
+    // certainly fit in whichever clock std library is using internally.
+    const auto max_wait = std::chrono::duration_cast<QuiesceTimeInterval>(
+        std::chrono::seconds(10)
+    );
+
+    while (!db_thread_has_work()) {
+      auto db_age = db.get_age();
+      if (next_event_at_age <= db_age) {
+        break;
+      }
+      auto timeout = std::min(max_wait, next_event_at_age - db_age);
+      auto wait_result = submit_condition.wait_for(ls, timeout);
+      if (std::cv_status::timeout == wait_result) {
+        dout(20) << "db idle, age: " << db_age << dendl;
+      }
     }
 
     auto [is_member, should_exit] = membership_upkeep();