From d95a65678f26c6088832707f4a07ec55baf2d949 Mon Sep 17 00:00:00 2001 From: Leonid Usov Date: Sat, 25 May 2024 16:35:31 +0300 Subject: [PATCH] squid: mds/quiesce-agt: add test for a rapid async ack In this scenario, the agent thread is able to run and generate an ack before the db_update call returns to the caller. Fixes: https://tracker.ceph.com/issues/66219 Signed-off-by: Leonid Usov (cherry picked from commit 4ab40ea0d3a366e3e2cb7bd7da8da9463b27eb25) Fixes: https://tracker.ceph.com/issues/66256 --- src/mds/QuiesceAgent.h | 2 +- src/test/mds/TestQuiesceAgent.cc | 128 ++++++++++++++++++++++--------- 2 files changed, 93 insertions(+), 37 deletions(-) diff --git a/src/mds/QuiesceAgent.h b/src/mds/QuiesceAgent.h index 57460ee946a..5c07d6d8074 100644 --- a/src/mds/QuiesceAgent.h +++ b/src/mds/QuiesceAgent.h @@ -237,7 +237,7 @@ class QuiesceAgent { return std::max(current.db_version, pending.db_version); } - void set_pending_roots(QuiesceDbVersion db_version, TrackedRoots&& new_roots); + virtual void set_pending_roots(QuiesceDbVersion db_version, TrackedRoots&& new_roots); void set_upkeep_needed(); diff --git a/src/test/mds/TestQuiesceAgent.cc b/src/test/mds/TestQuiesceAgent.cc index d32d48b9675..d16b058f0bf 100644 --- a/src/test/mds/TestQuiesceAgent.cc +++ b/src/test/mds/TestQuiesceAgent.cc @@ -10,15 +10,21 @@ * */ #include "common/Cond.h" +#include "common/debug.h" #include "mds/QuiesceAgent.h" #include "gtest/gtest.h" #include #include +#include #include #include #include #include -#include + +#define dout_context g_ceph_context +#define dout_subsys ceph_subsys_mds_quiesce +#undef dout_prefix +#define dout_prefix *_dout << "== test == " class QuiesceAgentTest : public testing::Test { using RequestHandle = QuiesceInterface::RequestHandle; @@ -70,8 +76,24 @@ class QuiesceAgentTest : public testing::Test { (*f)(pending, current); } } + + bool wait_for_agent_in_set_roots = false; + void set_pending_roots(QuiesceDbVersion db_version, TrackedRoots&& new_roots) override { + // just like the original version, + // but allows to simulate a case when the context + // switches to the agent thread and processes the new version + // before the calling has the chance to continue + + QuiesceAgent::set_pending_roots(db_version, std::move(new_roots)); + + while(wait_for_agent_in_set_roots && db_version != await_idle()) { + dout(3) << __func__ << ": awaiting agent on version " << db_version << dendl; + } + } + + ControlInterface& get_control_interface() { return quiesce_control; } }; - QuiesceMap latest_ack; + QuiesceMap async_ack; std::unordered_map quiesce_requests; ceph_tid_t last_tid; std::mutex mutex; @@ -134,7 +156,7 @@ class QuiesceAgentTest : public testing::Test { ci.agent_ack = [this](QuiesceMap const& update) { std::lock_guard l(mutex); - latest_ack = update; + async_ack = update; return 0; }; @@ -151,8 +173,9 @@ class QuiesceAgentTest : public testing::Test { using R = QuiesceMap::Roots::value_type; using RootInitList = std::initializer_list; + enum struct WaitForAgent { IfAsync, No }; - std::optional update(QuiesceDbVersion v, RootInitList roots) + std::optional update(QuiesceDbVersion v, RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync) { QuiesceMap map(v, QuiesceMap::Roots { roots }); @@ -160,17 +183,22 @@ class QuiesceAgentTest : public testing::Test { return map; } - return std::nullopt; + if (WaitForAgent::No == wait) { + return std::nullopt; + } else { + assert(await_idle_v(v.set_version)); + return async_ack; + } } - std::optional update(QuiesceSetVersion v, RootInitList roots) + std::optional update(QuiesceSetVersion v, RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync) { - return update(QuiesceDbVersion { 1, v }, roots); + return update(QuiesceDbVersion { 1, v }, roots, wait); } - std::optional update(RootInitList roots) + std::optional update(RootInitList roots, WaitForAgent wait = WaitForAgent::IfAsync) { - return update(agent->get_latest_version() + 1, roots); + return update({1, agent->get_latest_version().set_version + 1}, roots, wait); } template > @@ -332,22 +360,22 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) { EXPECT_TRUE(await_idle()); // we should have seen an ack sent - EXPECT_EQ(1, latest_ack.db_version); - EXPECT_EQ(1, latest_ack.roots.size()); - EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state); + EXPECT_EQ(1, async_ack.db_version); + EXPECT_EQ(1, async_ack.roots.size()); + EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root1").state); - latest_ack.clear(); + async_ack.clear(); // complete the other root with failure EXPECT_TRUE(complete_quiesce("root2", -1)); EXPECT_TRUE(await_idle()); - EXPECT_EQ(1, latest_ack.db_version); - ASSERT_EQ(2, latest_ack.roots.size()); - EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state); - EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state); + EXPECT_EQ(1, async_ack.db_version); + ASSERT_EQ(2, async_ack.roots.size()); + EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root1").state); + EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state); - latest_ack.clear(); + async_ack.clear(); // complete the third root with success // complete one root with success @@ -356,11 +384,11 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) { EXPECT_TRUE(await_idle()); // we should see the two quiesced roots in the ack, - EXPECT_EQ(1, latest_ack.db_version); - ASSERT_EQ(3, latest_ack.roots.size()); - EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state); - EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state); - EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root3").state); + EXPECT_EQ(1, async_ack.db_version); + ASSERT_EQ(3, async_ack.roots.size()); + EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root1").state); + EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state); + EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root3").state); { auto ack = update(2, { @@ -374,7 +402,7 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) { // root2 is still quiescing, no updates for it // root3 is released asyncrhonously so for now it should be QUIESCED ASSERT_EQ(2, ack->roots.size()); - EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state); + EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state); EXPECT_EQ(QS_QUIESCED, ack->roots.at("root3").state); } @@ -460,7 +488,7 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) { EXPECT_TRUE(quiesce_requests.contains("root1")); EXPECT_TRUE(quiesce_requests.contains("root2")); - latest_ack.clear(); + async_ack.clear(); // now, bring the roots back { auto ack = update(3, { @@ -480,10 +508,10 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) { // root1 and root2 are still registered internally // so it should result in a failure to quiesce them again - EXPECT_EQ(3, latest_ack.db_version); - EXPECT_EQ(2, latest_ack.roots.size()); - EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root1").state); - EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state); + EXPECT_EQ(3, async_ack.db_version); + EXPECT_EQ(2, async_ack.roots.size()); + EXPECT_EQ(QS_FAILED, async_ack.roots.at("root1").state); + EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state); // the actual state of the pinned objects shouldn't have changed EXPECT_EQ(QS_QUIESCED, pinned1->get_actual_state()); @@ -502,11 +530,11 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) { EXPECT_TRUE(complete_quiesce("root3")); EXPECT_TRUE(await_idle()); - EXPECT_EQ(3, latest_ack.db_version); - EXPECT_EQ(3, latest_ack.roots.size()); - EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root1").state); - EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state); - EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root3").state); + EXPECT_EQ(3, async_ack.db_version); + EXPECT_EQ(3, async_ack.roots.size()); + EXPECT_EQ(QS_FAILED, async_ack.roots.at("root1").state); + EXPECT_EQ(QS_FAILED, async_ack.roots.at("root2").state); + EXPECT_EQ(QS_QUIESCED, async_ack.roots.at("root3").state); } TEST_F(QuiesceAgentTest, TimeoutBeforeComplete) @@ -588,8 +616,8 @@ TEST_F(QuiesceAgentTest, RapidDbUpdates) // nothing should be in the ack // if we incorrectly submit root1 twice // then it should be repored here as FAILED - EXPECT_EQ(2, latest_ack.db_version); - EXPECT_EQ(0, latest_ack.roots.size()); + EXPECT_EQ(2, async_ack.db_version); + EXPECT_EQ(0, async_ack.roots.size()); { auto tracked = agent->tracked_roots(); @@ -597,3 +625,31 @@ TEST_F(QuiesceAgentTest, RapidDbUpdates) } } +TEST_F(QuiesceAgentTest, RapidAsyncAck) +{ + // This validates that if the agent thread manages to + // process a db update and generate a QUIESCED ack + // before the updating thread gets the CPU to progress, + // then the outdated synchronous ack is not sent + + agent->wait_for_agent_in_set_roots = true; + + // make the agent complete the request synchronosuly with the submit + auto && old_submit = agent->get_control_interface().submit_request; + agent->get_control_interface().submit_request = [this, old_submit = std::move(old_submit)](QuiesceRoot root, Context* ctx) { + auto result = old_submit(root, ctx); + dout(10) << "quiescing the root `" << root << "` in submit" << dendl; + complete_quiesce(root, 0); + return result; + }; + + auto ack = update(1, { + { "root1", QS_QUIESCING }, + }); + + auto && latest_ack = ack.value_or(async_ack); + + EXPECT_EQ(1, latest_ack.db_version); + ASSERT_EQ(1, latest_ack.roots.size()); + EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state); +} -- 2.39.5