From: Lucian Petrut Date: Thu, 4 May 2023 13:16:56 +0000 (+0000) Subject: test: add timer loop tests X-Git-Tag: v19.0.0~575^2~5 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=09a8cb0a6afca48501b36c5c9c2fcaf082860a4c;p=ceph.git test: add timer loop tests We've been experiencing timer hangs with mingw-llvm. std::condition_variable::wait_for was returning a few microseconds before the requested time and then hanging when called with a small interval (e.g. microseconds). This was affecting the OSD periodic tick, which would hang after a while (20m up to 2h). The issue can be reproduced with a timer loop and a small interval (e.g. 40us), in which case the timer is likely to hang after about 10s. We're adding some tests, while the actual mingw-llvm issue will be mitigated in a separate commit. Signed-off-by: Lucian Petrut --- diff --git a/src/test/TestTimers.cc b/src/test/TestTimers.cc index 78bdfd3a41bde..a561f8f509779 100644 --- a/src/test/TestTimers.cc +++ b/src/test/TestTimers.cc @@ -244,6 +244,72 @@ static int safe_timer_cancellation_test(SafeTimer &safe_timer, return ret; } +class TestLoopContext : public Context +{ +public: + explicit TestLoopContext(SafeTimer &_t, + ceph::mono_clock::time_point _deadline, + double _interval, + bool& _test_finished) + : t(_t) + , deadline(_deadline) + , interval(_interval) + , test_finished(_test_finished) + { + } + + ~TestLoopContext() override + { + } + + void finish(int r) override + { + if (ceph::mono_clock::now() > deadline) { + test_finished = true; + } else { + // We have to create a new context. + TestLoopContext* new_ctx = new TestLoopContext( + t, + deadline, + interval, + test_finished); + t.add_event_after(ceph::make_timespan(interval), new_ctx); + } + } + +protected: + SafeTimer &t; + ceph::mono_clock::time_point deadline; + double interval; + bool& test_finished; +}; + +static int safe_timer_loop_test(SafeTimer &safe_timer, + ceph::mutex& safe_timer_lock) { + // TODO: consider using gtest. + cout << __PRETTY_FUNCTION__ << std::endl; + + bool test_finished = false; + int test_duration = 10; + double tick_interval = 0.00004; + auto deadline = ceph::mono_clock::now() + + std::chrono::seconds(test_duration); + TestLoopContext* ctx = new TestLoopContext( + safe_timer, + deadline, + tick_interval, + test_finished); + + safe_timer.add_event_after(ceph::make_timespan(tick_interval), ctx); + + std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2)); + if (!test_finished) { + std::cerr << "The timer loop job didn't complete, it probably hung." + << std::endl; + return -1; + } +} + int main(int argc, const char **argv) { auto args = argv_to_vec(argc, argv); @@ -274,6 +340,10 @@ int main(int argc, const char **argv) if (ret) goto done; + ret = safe_timer_loop_test(safe_timer, safe_timer_lock); + if (ret) + goto done; + done: safe_timer.shutdown(); print_status(argv[0], ret); diff --git a/src/test/common/CMakeLists.txt b/src/test/common/CMakeLists.txt index c68d8639f4c55..c044daf662ab7 100644 --- a/src/test/common/CMakeLists.txt +++ b/src/test/common/CMakeLists.txt @@ -372,7 +372,7 @@ add_ceph_unittest(unittest_cdc) add_executable(unittest_ceph_timer test_ceph_timer.cc) add_ceph_unittest(unittest_ceph_timer) -target_link_libraries(unittest_ceph_timer ceph-common) +target_link_libraries(unittest_ceph_timer global ceph-common) add_executable(unittest_option test_option.cc) target_link_libraries(unittest_option ceph-common GTest::Main) diff --git a/src/test/common/test_ceph_timer.cc b/src/test/common/test_ceph_timer.cc index 12391c7ea71b5..925f05bd1f9f0 100644 --- a/src/test/common/test_ceph_timer.cc +++ b/src/test/common/test_ceph_timer.cc @@ -18,6 +18,7 @@ #include +#include "common/ceph_time.h" #include "common/ceph_timer.h" using namespace std::literals; @@ -133,6 +134,30 @@ void cancellation() EXPECT_FALSE(timer.cancel_event(e)); } } + +template +void tick(ceph::timer* t, + typename TC::time_point deadline, + double interval, + bool* test_finished, + typename TC::time_point* last_tick, + uint64_t* tick_count, + typename TC::time_point* last_tp, + typename TC::time_point* second_last_tp) { + *last_tick = TC::now(); + *tick_count += 1; + + if (TC::now() > deadline) { + *test_finished = true; + } else { + auto tp = TC::now() + ceph::make_timespan(interval); + + *second_last_tp = *last_tp; + *last_tp = tp; + + t->reschedule_me(tp); + } +} } TEST(RunSome, Steady) @@ -161,3 +186,42 @@ TEST(CancelAll, Wall) { cancel_all(); } + +TEST(TimerLoopTest, TimerLoop) +{ + using TC = ceph::coarse_mono_clock; + ceph::timer t; + bool test_finished = false; + int test_duration = 10; + double tick_interval = 0.00004; + auto last_tick = TC::now(); + uint64_t tick_count = 0; + + auto last_tp = TC::now(); + auto second_last_tp = TC::now(); + TC::time_point test_deadline = TC::now() + + std::chrono::seconds(test_duration); + + t.add_event( + ceph::make_timespan(tick_interval), + &tick, + &t, + test_deadline, + tick_interval, + &test_finished, + &last_tick, + &tick_count, + &last_tp, + &second_last_tp); + + std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2)); + + ASSERT_TRUE(test_finished) + << "The timer job didn't complete, it probably hung. " + << "Time since last tick: " + << (TC::now() - last_tick) + << ". Tick count: " << tick_count + << ". Last wait tp: " << last_tp + << ", second last tp: " << second_last_tp + << ", deadline tp: " << test_deadline; +}