]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
test: add timer loop tests
authorLucian Petrut <lpetrut@cloudbasesolutions.com>
Thu, 4 May 2023 13:16:56 +0000 (13:16 +0000)
committerLucian Petrut <lpetrut@cloudbasesolutions.com>
Wed, 30 Aug 2023 12:59:00 +0000 (12:59 +0000)
We've been experiencing timer hangs with mingw-llvm.
std::condition_variable::wait_for was returning a few microseconds
before the requested time and then hanging when called with a
small interval (e.g. microseconds).

This was affecting the OSD periodic tick, which would hang after
a while (20m up to 2h).

The issue can be reproduced with a timer loop and a small interval
(e.g. 40us), in which case the timer is likely to hang after about
10s.

We're adding some tests, while the actual mingw-llvm issue will
be mitigated in a separate commit.

Signed-off-by: Lucian Petrut <lpetrut@cloudbasesolutions.com>
src/test/TestTimers.cc
src/test/common/CMakeLists.txt
src/test/common/test_ceph_timer.cc

index 78bdfd3a41bde15cbc98de564fb2646e27814fdb..a561f8f50977900d299aefd23ff8055d63b53684 100644 (file)
@@ -244,6 +244,72 @@ static int safe_timer_cancellation_test(SafeTimer &safe_timer,
   return ret;
 }
 
+class TestLoopContext : public Context
+{
+public:
+  explicit TestLoopContext(SafeTimer &_t,
+                           ceph::mono_clock::time_point _deadline,
+                           double _interval,
+                           bool& _test_finished)
+    : t(_t)
+    , deadline(_deadline)
+    , interval(_interval)
+    , test_finished(_test_finished)
+  {
+  }
+
+  ~TestLoopContext() override
+  {
+  }
+
+  void finish(int r) override
+  {
+    if (ceph::mono_clock::now() > deadline) {
+      test_finished = true;
+    } else {
+      // We have to create a new context.
+      TestLoopContext* new_ctx = new TestLoopContext(
+        t,
+        deadline,
+        interval,
+        test_finished);
+      t.add_event_after(ceph::make_timespan(interval), new_ctx);
+    }
+  }
+
+protected:
+  SafeTimer &t;
+  ceph::mono_clock::time_point deadline;
+  double interval;
+  bool& test_finished;
+};
+
+static int safe_timer_loop_test(SafeTimer &safe_timer,
+                                ceph::mutex& safe_timer_lock) {
+  // TODO: consider using gtest.
+  cout << __PRETTY_FUNCTION__ << std::endl;
+
+  bool test_finished = false;
+  int test_duration = 10;
+  double tick_interval = 0.00004;
+  auto deadline = ceph::mono_clock::now() +
+                  std::chrono::seconds(test_duration);
+  TestLoopContext* ctx = new TestLoopContext(
+    safe_timer,
+    deadline,
+    tick_interval,
+    test_finished);
+
+  safe_timer.add_event_after(ceph::make_timespan(tick_interval), ctx);
+
+  std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2));
+  if (!test_finished) {
+    std::cerr << "The timer loop job didn't complete, it probably hung."
+              << std::endl;
+    return -1;
+  }
+}
+
 int main(int argc, const char **argv)
 {
   auto args = argv_to_vec(argc, argv);
@@ -274,6 +340,10 @@ int main(int argc, const char **argv)
   if (ret)
     goto done;
 
+  ret = safe_timer_loop_test(safe_timer, safe_timer_lock);
+  if (ret)
+    goto done;
+
 done:
   safe_timer.shutdown();
   print_status(argv[0], ret);
index c68d8639f4c55c9af0d3d923c6356f1a9de13aab..c044daf662ab7f25341ddd1e060905d491f25ef4 100644 (file)
@@ -372,7 +372,7 @@ add_ceph_unittest(unittest_cdc)
 
 add_executable(unittest_ceph_timer test_ceph_timer.cc)
 add_ceph_unittest(unittest_ceph_timer)
-target_link_libraries(unittest_ceph_timer ceph-common)
+target_link_libraries(unittest_ceph_timer global ceph-common)
 
 add_executable(unittest_option test_option.cc)
 target_link_libraries(unittest_option ceph-common GTest::Main)
index 12391c7ea71b5d94151f3b23b18e7b7578330a2e..925f05bd1f9f071dfaa0e15a164dcf569b47c6cc 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <gtest/gtest.h>
 
+#include "common/ceph_time.h"
 #include "common/ceph_timer.h"
 
 using namespace std::literals;
@@ -133,6 +134,30 @@ void cancellation()
     EXPECT_FALSE(timer.cancel_event(e));
   }
 }
+
+template<typename TC>
+void tick(ceph::timer<TC>* t,
+          typename TC::time_point deadline,
+          double interval,
+          bool* test_finished,
+          typename TC::time_point* last_tick,
+          uint64_t* tick_count,
+          typename TC::time_point* last_tp,
+          typename TC::time_point* second_last_tp) {
+  *last_tick = TC::now();
+  *tick_count += 1;
+
+  if (TC::now() > deadline) {
+    *test_finished = true;
+  } else {
+    auto tp = TC::now() + ceph::make_timespan(interval);
+
+    *second_last_tp = *last_tp;
+    *last_tp = tp;
+
+    t->reschedule_me(tp);
+  }
+}
 }
 
 TEST(RunSome, Steady)
@@ -161,3 +186,42 @@ TEST(CancelAll, Wall)
 {
   cancel_all<std::chrono::system_clock>();
 }
+
+TEST(TimerLoopTest, TimerLoop)
+{
+  using TC = ceph::coarse_mono_clock;
+  ceph::timer<TC> t;
+  bool test_finished = false;
+  int test_duration = 10;
+  double tick_interval = 0.00004;
+  auto last_tick = TC::now();
+  uint64_t tick_count = 0;
+
+  auto last_tp = TC::now();
+  auto second_last_tp = TC::now();
+  TC::time_point test_deadline = TC::now() +
+                                 std::chrono::seconds(test_duration);
+
+  t.add_event(
+    ceph::make_timespan(tick_interval),
+    &tick<TC>,
+    &t,
+    test_deadline,
+    tick_interval,
+    &test_finished,
+    &last_tick,
+    &tick_count,
+    &last_tp,
+    &second_last_tp);
+
+  std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2));
+
+  ASSERT_TRUE(test_finished)
+    << "The timer job didn't complete, it probably hung. "
+    << "Time since last tick: "
+    << (TC::now() - last_tick)
+    << ". Tick count: " << tick_count
+    << ". Last wait tp: " << last_tp
+    << ", second last tp: " << second_last_tp
+    << ", deadline tp: " << test_deadline;
+}