From 9a4c5853d1c2a353f72cd6358bbdedd93c4cc209 Mon Sep 17 00:00:00 2001 From: Leonid Usov Date: Sun, 26 May 2024 11:16:48 +0300 Subject: [PATCH] mds/quiesce-agt: never send a synchronous ack Defer to the agent thread to perform all acking. This avoids race conditions between the updating thread and the acking thread. Fixes: https://tracker.ceph.com/issues/66219 Signed-off-by: Leonid Usov --- src/mds/QuiesceAgent.cc | 5 +++-- src/test/mds/TestQuiesceAgent.cc | 36 ++++++++++++++------------------ 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/src/mds/QuiesceAgent.cc b/src/mds/QuiesceAgent.cc index 6bc6770e31046..3a01910cb264b 100644 --- a/src/mds/QuiesceAgent.cc +++ b/src/mds/QuiesceAgent.cc @@ -82,8 +82,9 @@ bool QuiesceAgent::db_update(QuiesceMap& map) // ack with the known state stored in `map` set_pending_roots(map.db_version, std::move(new_roots)); - // always send a synchronous ack - return true; + // to avoid ack races with the agent_thread, + // never send a synchronous ack + return false; } void* QuiesceAgent::agent_thread_main() { diff --git a/src/test/mds/TestQuiesceAgent.cc b/src/test/mds/TestQuiesceAgent.cc index 130d319403ab4..aa108075faa59 100644 --- a/src/test/mds/TestQuiesceAgent.cc +++ b/src/test/mds/TestQuiesceAgent.cc @@ -278,16 +278,19 @@ TEST_F(QuiesceAgentTest, DbUpdates) { // manipulate root0 and root1 as if they were quiesced and root2 as if it was released auto& root0 = *roots.at("root0"); - root0.quiesce_result = 0; - EXPECT_EQ(QS_QUIESCED, root0.get_actual_state()); + complete_quiesce("root0", 0); auto& root1 = *roots.at("root1"); - root1.quiesce_result = 0; - EXPECT_EQ(QS_QUIESCED, root1.get_actual_state()); + complete_quiesce("root1", 0); auto& root2 = *roots.at("root2"); - root2.quiesce_result = 0; - root2.cancel_result = 0; + complete_quiesce("root2", 0); + root2.cancel_result = root2.cancel(*root2.quiesce_request); + + EXPECT_TRUE(await_idle()); + + EXPECT_EQ(QS_QUIESCED, root0.get_actual_state()); + EXPECT_EQ(QS_QUIESCED, root1.get_actual_state()); EXPECT_EQ(QS_RELEASED, root2.get_actual_state()); } @@ -501,13 +504,10 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) { { "root1", QS_QUIESCING }, { "root2", QS_QUIESCING }, { "root3", QS_QUIESCING }, - }); + }, WaitForAgent::No); - ASSERT_TRUE(ack.has_value()); - EXPECT_EQ(3, ack->db_version); - // even though root1 is already quiesced, - // we should not know about it synchronously - EXPECT_EQ(0, ack->roots.size()); + // no sync update + EXPECT_FALSE(ack.has_value()); } EXPECT_TRUE(await_idle()); @@ -600,21 +600,17 @@ TEST_F(QuiesceAgentTest, RapidDbUpdates) auto ack = update(2, { { "root1", QS_QUIESCING }, { "root2", QS_QUIESCING }, - }); + }, WaitForAgent::No); - ASSERT_TRUE(ack.has_value()); - EXPECT_EQ(2, ack->db_version); - EXPECT_EQ(0, ack->roots.size()); + EXPECT_FALSE(ack.has_value()); }; { auto ack = update(1, { { "root1", QS_QUIESCING }, - }); + }, WaitForAgent::No); - ASSERT_TRUE(ack.has_value()); - EXPECT_EQ(1, ack->db_version); - EXPECT_EQ(0, ack->roots.size()); + EXPECT_FALSE(ack.has_value()); } EXPECT_TRUE(await_idle_v(2)); -- 2.39.5