From 45dd90c76cf407c899608bd85028d3323aced57d Mon Sep 17 00:00:00 2001 From: Leonid Usov Date: Mon, 13 May 2024 18:14:32 +0300 Subject: [PATCH] squid: mds/quiesce-db: test that a peer on a newer membership epoch can ack a root Signed-off-by: Leonid Usov (cherry picked from commit f58f63c4aecc867dfe4fd68f04629e8e45f3e864) Fixes: https://tracker.ceph.com/issues/66070 --- src/mds/QuiesceDb.h | 18 ++++++------- src/mds/QuiesceDbManager.cc | 22 +++++++++++++--- src/test/mds/TestQuiesceDb.cc | 48 ++++++++++++++++++++++++++++++++--- 3 files changed, 73 insertions(+), 15 deletions(-) diff --git a/src/mds/QuiesceDb.h b/src/mds/QuiesceDb.h index e95bfcf59e33c..98d98451f6d81 100644 --- a/src/mds/QuiesceDb.h +++ b/src/mds/QuiesceDb.h @@ -54,23 +54,23 @@ operator<<(std::basic_ostream& os, const QuiesceState& qs) { switch (qs) { case QS__INVALID: - return os << "QS__INVALID (" << (int)qs << ")"; + return os << "QS__INVALID"; case QS_QUIESCING: - return os << "QS_QUIESCING (" << (int)qs << ")"; + return os << "QS_QUIESCING"; case QS_QUIESCED: - return os << "QS_QUIESCED (" << (int)qs << ")"; + return os << "QS_QUIESCED"; case QS_RELEASING: - return os << "QS_RELEASING (" << (int)qs << ")"; + return os << "QS_RELEASING"; case QS_RELEASED: - return os << "QS_RELEASED (" << (int)qs << ")"; + return os << "QS_RELEASED"; case QS_FAILED: - return os << "QS_FAILED (" << (int)qs << ")"; + return os << "QS_FAILED"; case QS_CANCELED: - return os << "QS_CANCELED (" << (int)qs << ")"; + return os << "QS_CANCELED"; case QS_TIMEDOUT: - return os << "QS_TIMEDOUT (" << (int)qs << ")"; + return os << "QS_TIMEDOUT"; case QS_EXPIRED: - return os << "QS_EXPIRED (" << (int)qs << ")"; + return os << "QS_EXPIRED"; default: return os << "!Unknown quiesce state! (" << (int)qs << ")"; } diff --git a/src/mds/QuiesceDbManager.cc b/src/mds/QuiesceDbManager.cc index 2c727d7152608..64833934ba1b7 100644 --- a/src/mds/QuiesceDbManager.cc +++ b/src/mds/QuiesceDbManager.cc @@ -436,6 +436,7 @@ void QuiesceDbManager::leader_record_ack(QuiesceInterface::PeerId from, QuiesceM auto it = peers.find(from); if (it == peers.end()) { + dout(5) << "unknown peer " << from << dendl; // ignore updates from unknown peers return; } @@ -443,10 +444,15 @@ void QuiesceDbManager::leader_record_ack(QuiesceInterface::PeerId from, QuiesceM auto & info = it->second; if (diff_map.db_version > db_version()) { - dout(3) << "ignoring unknown version ack by rank " << from << " (" << diff_map.db_version << " > " << db_version() << ")" << dendl; - dout(5) << "will send the peer a full DB" << dendl; - info.diff_map.clear(); + dout(15) << "future version ack by peer " << from << " (" << diff_map.db_version << " > " << db_version() << ")" << dendl; + if (diff_map.db_version.epoch > db_version().epoch && diff_map.db_version.set_version <= db_version().set_version) { + dout(15) << "my epoch is behind, ignoring this until my membership is updated" << dendl; + } else { + dout(5) << "will send the peer a full DB" << dendl; + info.diff_map.clear(); + } } else { + dout(20) << "ack " << diff_map << " from peer " << from << dendl; info.diff_map = std::move(diff_map); info.last_seen = QuiesceClock::now(); } @@ -874,6 +880,7 @@ size_t QuiesceDbManager::check_peer_reports(const QuiesceSetId& set_id, const Qu max_reported_state = QS__INVALID; size_t up_to_date_peers = 0; + std::multimap> reporting_peers; for (auto& [peer, info] : peers) { // we consider the last bit of information we had from a given peer @@ -892,6 +899,7 @@ size_t QuiesceDbManager::check_peer_reports(const QuiesceSetId& set_id, const Qu continue; } reported_state = pr_state.state; + reporting_peers.insert({pr_state.state, {peer, info.diff_map.db_version}}); } // but we only consider the peer up to date given the version @@ -904,10 +912,18 @@ size_t QuiesceDbManager::check_peer_reports(const QuiesceSetId& set_id, const Qu } if (min_reported_state == QS__MAX) { + // this means that we had 0 eligible peer reports min_reported_state = set.get_requested_member_state(); max_reported_state = set.get_requested_member_state(); } + dout(20) << dsetroot("") + << "up_to_date_peers: " << up_to_date_peers + << " min_reported_state: " << min_reported_state + << " max_reported_state: " << max_reported_state + << " peer_acks: " << reporting_peers + << dendl; + return up_to_date_peers; } diff --git a/src/test/mds/TestQuiesceDb.cc b/src/test/mds/TestQuiesceDb.cc index 4f98e5dbab535..ce0ad767edb11 100644 --- a/src/test/mds/TestQuiesceDb.cc +++ b/src/test/mds/TestQuiesceDb.cc @@ -77,9 +77,6 @@ class QuiesceDbTest: public testing::Test { Db& internal_db() { return db; } - QuiesceClusterMembership& internal_membership() { - return membership; - } decltype(pending_requests)& internal_pending_requests() { return pending_requests; } @@ -89,6 +86,11 @@ class QuiesceDbTest: public testing::Test { decltype(peers)& internal_peers() { return peers; } + epoch_t bump_epoch() { + std::lock_guard l(submit_mutex); + submit_condition.notify_all(); + return ++cluster_membership->epoch; + } }; epoch_t epoch = 0; @@ -1617,4 +1619,44 @@ TEST_F(QuiesceDbTest, MultiRankRecovery) ASSERT_EQ(2, last_request->response.sets.size()); EXPECT_EQ(QS_QUIESCING, last_request->response.sets.at("set1").rstate.state); EXPECT_EQ(QS_QUIESCING, last_request->response.sets.at("set2").rstate.state); +} + +/* ========================================= */ +TEST_F(QuiesceDbTest, AckDuringEpochMismatch) +{ + ASSERT_NO_FATAL_FAILURE(configure_cluster({ mds_gid_t(1), mds_gid_t(2) })); + managers.at(mds_gid_t(1))->reset_agent_callback(QUIESCING_AGENT_CB); + + ASSERT_EQ(OK(), run_request([](auto& r) { + r.set_id = "set1"; + r.timeout = sec(60); + r.expiration = sec(60); + r.include_roots({ "root1" }); + })); + + // we are quiescing because manager 2 hasn't yet acknowledged the new state + EXPECT_EQ(QS_QUIESCING, last_request->response.sets.at("set1").rstate.state); + + // imagine that a new epoch has started on the peer before it did for the leader + managers.at(mds_gid_t(2))->bump_epoch(); + + // do the acking while our epoch is higher + { + // wait for the agent to ack root1 as failed + auto did_ack = add_ack_hook([](auto gid, auto const& ack) { + return gid == mds_gid_t(2) && ack.roots.contains("file:/root1") && ack.roots.at("file:/root1").state == QS_QUIESCED; + }); + + // allow acks + managers.at(mds_gid_t(2))->reset_agent_callback(QUIESCING_AGENT_CB); + + EXPECT_EQ(std::future_status::ready, did_ack.wait_for(std::chrono::milliseconds(100))); + } + + // now, bump the epoch on the leader and make sure it quiesces the set + managers.at(mds_gid_t(1))->bump_epoch(); + EXPECT_EQ(OK(), run_request([](auto& r) { + r.set_id = "set1"; + r.await = sec(10); + })); } \ No newline at end of file -- 2.39.5