We've been experiencing timer hangs with mingw-llvm.
std::condition_variable::wait_for was returning a few microseconds
before the requested time and then hanging when called with a
small interval (e.g. microseconds).
This was affecting the OSD periodic tick, which would hang after
a while (20m up to 2h).
The issue can be reproduced with a timer loop and a small interval
(e.g. 40us), in which case the timer is likely to hang after about
10s.
We're adding some tests, while the actual mingw-llvm issue will
be mitigated in a separate commit.
Signed-off-by: Lucian Petrut <lpetrut@cloudbasesolutions.com>
return ret;
}
+class TestLoopContext : public Context
+{
+public:
+ explicit TestLoopContext(SafeTimer &_t,
+ ceph::mono_clock::time_point _deadline,
+ double _interval,
+ bool& _test_finished)
+ : t(_t)
+ , deadline(_deadline)
+ , interval(_interval)
+ , test_finished(_test_finished)
+ {
+ }
+
+ ~TestLoopContext() override
+ {
+ }
+
+ void finish(int r) override
+ {
+ if (ceph::mono_clock::now() > deadline) {
+ test_finished = true;
+ } else {
+ // We have to create a new context.
+ TestLoopContext* new_ctx = new TestLoopContext(
+ t,
+ deadline,
+ interval,
+ test_finished);
+ t.add_event_after(ceph::make_timespan(interval), new_ctx);
+ }
+ }
+
+protected:
+ SafeTimer &t;
+ ceph::mono_clock::time_point deadline;
+ double interval;
+ bool& test_finished;
+};
+
+static int safe_timer_loop_test(SafeTimer &safe_timer,
+ ceph::mutex& safe_timer_lock) {
+ // TODO: consider using gtest.
+ cout << __PRETTY_FUNCTION__ << std::endl;
+
+ bool test_finished = false;
+ int test_duration = 10;
+ double tick_interval = 0.00004;
+ auto deadline = ceph::mono_clock::now() +
+ std::chrono::seconds(test_duration);
+ TestLoopContext* ctx = new TestLoopContext(
+ safe_timer,
+ deadline,
+ tick_interval,
+ test_finished);
+
+ safe_timer.add_event_after(ceph::make_timespan(tick_interval), ctx);
+
+ std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2));
+ if (!test_finished) {
+ std::cerr << "The timer loop job didn't complete, it probably hung."
+ << std::endl;
+ return -1;
+ }
+}
+
int main(int argc, const char **argv)
{
auto args = argv_to_vec(argc, argv);
if (ret)
goto done;
+ ret = safe_timer_loop_test(safe_timer, safe_timer_lock);
+ if (ret)
+ goto done;
+
done:
safe_timer.shutdown();
print_status(argv[0], ret);
add_executable(unittest_ceph_timer test_ceph_timer.cc)
add_ceph_unittest(unittest_ceph_timer)
-target_link_libraries(unittest_ceph_timer ceph-common)
+target_link_libraries(unittest_ceph_timer global ceph-common)
add_executable(unittest_option test_option.cc)
target_link_libraries(unittest_option ceph-common GTest::Main)
#include <gtest/gtest.h>
+#include "common/ceph_time.h"
#include "common/ceph_timer.h"
using namespace std::literals;
EXPECT_FALSE(timer.cancel_event(e));
}
}
+
+template<typename TC>
+void tick(ceph::timer<TC>* t,
+ typename TC::time_point deadline,
+ double interval,
+ bool* test_finished,
+ typename TC::time_point* last_tick,
+ uint64_t* tick_count,
+ typename TC::time_point* last_tp,
+ typename TC::time_point* second_last_tp) {
+ *last_tick = TC::now();
+ *tick_count += 1;
+
+ if (TC::now() > deadline) {
+ *test_finished = true;
+ } else {
+ auto tp = TC::now() + ceph::make_timespan(interval);
+
+ *second_last_tp = *last_tp;
+ *last_tp = tp;
+
+ t->reschedule_me(tp);
+ }
+}
}
TEST(RunSome, Steady)
{
cancel_all<std::chrono::system_clock>();
}
+
+TEST(TimerLoopTest, TimerLoop)
+{
+ using TC = ceph::coarse_mono_clock;
+ ceph::timer<TC> t;
+ bool test_finished = false;
+ int test_duration = 10;
+ double tick_interval = 0.00004;
+ auto last_tick = TC::now();
+ uint64_t tick_count = 0;
+
+ auto last_tp = TC::now();
+ auto second_last_tp = TC::now();
+ TC::time_point test_deadline = TC::now() +
+ std::chrono::seconds(test_duration);
+
+ t.add_event(
+ ceph::make_timespan(tick_interval),
+ &tick<TC>,
+ &t,
+ test_deadline,
+ tick_interval,
+ &test_finished,
+ &last_tick,
+ &tick_count,
+ &last_tp,
+ &second_last_tp);
+
+ std::this_thread::sleep_for(std::chrono::seconds(test_duration + 2));
+
+ ASSERT_TRUE(test_finished)
+ << "The timer job didn't complete, it probably hung. "
+ << "Time since last tick: "
+ << (TC::now() - last_tick)
+ << ". Tick count: " << tick_count
+ << ". Last wait tp: " << last_tp
+ << ", second last tp: " << second_last_tp
+ << ", deadline tp: " << test_deadline;
+}