From: Nitzan Mordechai Date: Wed, 26 Nov 2025 14:36:42 +0000 (+0000) Subject: aio_cxx: Fix mutual deadlock in PoolEIOflag test X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9da2d94dd46adaae8926be3aeff33f3d67b17741;p=ceph.git aio_cxx: Fix mutual deadlock in PoolEIOflag test The LibRadosAio.PoolEIOFlag test was unstable, it was hanging due to: - Deadlock: The main thread held the shared mutex ('my_lock') while calling thread join. This created a mutual deadlock. * Fix: Mutex is unlocked before thread join using RAII scopes. - also convert to std::jthread and drop the join Fixes: https://tracker.ceph.com/issues/73946 Signed-off-by: Nitzan Mordechai --- diff --git a/src/test/librados/aio_cxx.cc b/src/test/librados/aio_cxx.cc index ab26d59b2ebd..fa4e686603d8 100644 --- a/src/test/librados/aio_cxx.cc +++ b/src/test/librados/aio_cxx.cc @@ -34,7 +34,7 @@ class AioTestDataPP { public: AioTestDataPP() - : m_init(false), + : m_init(false), m_oid("foo") { } @@ -2422,25 +2422,22 @@ TEST(LibRadosAio, PoolEIOFlag) { bufferlist bl; bl.append("some data"); - std::thread *t = nullptr; + std::unique_ptr t; std::atomic missed_eio{false}; unsigned max = 1000; unsigned timeout = max * 10; unsigned long i = 1; - my_lock.lock(); for (; min_failed == 0 && i <= timeout; ++i) { io_info *info = new io_info; info->i = i; info->c = Rados::aio_create_completion(); info->c->set_complete_callback((void*)info, pool_io_callback); - inflight.insert(i); - my_lock.unlock(); int r = test_data.m_ioctx.aio_write(test_data.m_oid, info->c, bl, bl.length(), 0); - //cout << "start " << i << " r = " << r << std::endl; - if (i == max / 2) { - t = new std::thread([&] { + // Trigger EIO after 100 ops have been submitted + if (i == 100) { + t = std::make_unique([&] { cout << "sending pool EIO time: " << ceph_clock_now() << std::endl; ASSERT_EQ(0, test_data.m_cluster.mon_command( fmt::format(R"({{ @@ -2465,8 +2462,9 @@ TEST(LibRadosAio, PoolEIOFlag) { break; } } - t->join(); - delete t; + if (t && t->joinable()) { + t->join(); + } // wait for ios to finish for (; !inflight.empty(); ++i) { @@ -2481,8 +2479,8 @@ TEST(LibRadosAio, PoolEIOFlag) { GTEST_SKIP() << "eio flag missed all ios that already completed"; } cout << "max_success " << max_success << ", min_failed " << min_failed << std::endl; + ASSERT_TRUE(min_failed > 0) << "Did not catch any EIO errors"; ASSERT_TRUE(max_success + 1 == min_failed); - my_lock.unlock(); } // This test case reproduces https://tracker.ceph.com/issues/57152