From cbac8e8d7801c1dc4577db6ef5a9fd74126a9113 Mon Sep 17 00:00:00 2001 From: Leonid Usov Date: Mon, 4 Mar 2024 00:45:07 +0200 Subject: [PATCH] mds/quiesce-db: incorporate review comments Signed-off-by: Leonid Usov (cherry picked from commit 3e012f7ba5b8871d1bcf74d402c58553e18835dd) --- src/mds/FSMap.cc | 7 ++++ src/mds/MDSMap.h | 7 +++- src/mds/MDSRankQuiesce.cc | 12 ++++++ src/mds/QuiesceAgent.cc | 2 +- src/mds/QuiesceDb.h | 4 +- src/mds/QuiesceDbEncoding.h | 61 +++++++++++++++++++++++------ src/mds/QuiesceDbManager.cc | 2 +- src/messages/MMDSQuiesceDbAck.h | 12 ++---- src/messages/MMDSQuiesceDbListing.h | 10 ++--- src/test/mds/TestQuiesceAgent.cc | 6 +-- src/test/mds/TestQuiesceDb.cc | 15 +++++++ 11 files changed, 102 insertions(+), 36 deletions(-) diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index 2f14a780b84..a266ad253af 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -13,6 +13,8 @@ */ #include +#include +#include #include "FSMap.h" #include "common/debug.h" @@ -868,6 +870,11 @@ void FSMap::sanity(bool pending) const ceph_assert(info.compat.writeable(fs.mds_map.compat)); } + auto const& leader = fs.mds_map.get_quiesce_db_cluster_leader(); + auto const& members = fs.mds_map.get_quiesce_db_cluster_members(); + ceph_assert(leader == MDS_GID_NONE || members.contains(leader)); + ceph_assert(std::ranges::all_of(members, [&infos = fs.mds_map.mds_info](auto m){return infos.contains(m);})); + for (const auto &j : fs.mds_map.up) { mds_rank_t rank = j.first; ceph_assert(fs.mds_map.in.count(rank) == 1); diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index 2dd8fba8342..c2cb9b009ff 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -318,10 +318,15 @@ public: members = qdb_cluster_members; } - mds_gid_t get_quiesce_db_cluster_leader() { + mds_gid_t get_quiesce_db_cluster_leader() const { return qdb_cluster_leader; } + std::unordered_set const& get_quiesce_db_cluster_members() const + { + return qdb_cluster_members; + } + bool update_quiesce_db_cluster(mds_gid_t const& leader, std::same_as> auto && members) { if (leader == qdb_cluster_leader && members == qdb_cluster_members) { return false; diff --git a/src/mds/MDSRankQuiesce.cc b/src/mds/MDSRankQuiesce.cc index 53d82cb1364..004e1536483 100644 --- a/src/mds/MDSRankQuiesce.cc +++ b/src/mds/MDSRankQuiesce.cc @@ -1,3 +1,15 @@ +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2024 IBM, Red Hat + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + #include "MDSRank.h" #include "MDCache.h" diff --git a/src/mds/QuiesceAgent.cc b/src/mds/QuiesceAgent.cc index c1550549ba6..1b5dfe44460 100644 --- a/src/mds/QuiesceAgent.cc +++ b/src/mds/QuiesceAgent.cc @@ -182,7 +182,7 @@ void* QuiesceAgent::agent_thread_main() { dout(3) << "got error: " << rc << " trying to send " << ack << dendl; } } - ack.reset(); + ack.clear(); lock.lock(); diff --git a/src/mds/QuiesceDb.h b/src/mds/QuiesceDb.h index 8fccc43d448..e95bfcf59e3 100644 --- a/src/mds/QuiesceDb.h +++ b/src/mds/QuiesceDb.h @@ -464,7 +464,7 @@ struct QuiesceDbRequest { config(*this); } - void reset() { + void clear() { reset([](auto&r){}); } @@ -644,7 +644,7 @@ struct QuiesceMap { }; using Roots = std::unordered_map; Roots roots; - void reset() { + void clear() { db_version = {0, 0}; roots.clear(); } diff --git a/src/mds/QuiesceDbEncoding.h b/src/mds/QuiesceDbEncoding.h index ff2167c5d40..c76ed2d0c52 100644 --- a/src/mds/QuiesceDbEncoding.h +++ b/src/mds/QuiesceDbEncoding.h @@ -15,21 +15,20 @@ #include "include/encoding.h" #include -struct QuiesceDbEncoding { - static constexpr int version = 1; - static constexpr int compat = 1; -}; - void encode(QuiesceDbVersion const& v, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(v.epoch, bl, features); encode(v.set_version, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceDbVersion& v, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(v.epoch, p); decode(v.set_version, p); + DECODE_FINISH(p); } void encode(QuiesceState const & state, bufferlist& bl, uint64_t features=0) @@ -60,48 +59,61 @@ void decode(QuiesceTimeInterval & interval, bufferlist::const_iterator& p) void encode(RecordedQuiesceState const& rstate, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(rstate.state, bl, features); encode(rstate.at_age.count(), bl, features); + ENCODE_FINISH(bl); } void decode(RecordedQuiesceState& rstate, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(rstate.state, p); decode(rstate.at_age, p); + DECODE_FINISH(p); } void encode(QuiesceSet::MemberInfo const& member, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(member.rstate, bl, features); encode(member.excluded, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceSet::MemberInfo& member, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(member.rstate, p); decode(member.excluded, p); + DECODE_FINISH(p); } void encode(QuiesceSet const& set, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(set.version, bl, features); encode(set.rstate, bl, features); - encode(set.timeout, bl, features); - encode(set.expiration, bl, features); + ::encode(set.timeout, bl, features); + ::encode(set.expiration, bl, features); encode(set.members, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceSet& set, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(set.version, p); decode(set.rstate, p); - decode(set.timeout, p); - decode(set.expiration, p); + ::decode(set.timeout, p); + ::decode(set.expiration, p); decode(set.members, p); + DECODE_FINISH(p); } void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(req.control.raw, bl, features); encode(req.set_id, bl); encode(req.if_version, bl); @@ -109,10 +121,12 @@ void encode(QuiesceDbRequest const& req, bufferlist& bl, uint64_t features = 0) encode(req.expiration, bl); encode(req.await, bl); encode(req.roots, bl); + ENCODE_FINISH(bl); } void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(req.control.raw, p); decode(req.set_id, p); decode(req.if_version, p); @@ -120,66 +134,87 @@ void decode(QuiesceDbRequest& req, bufferlist::const_iterator& p) decode(req.expiration, p); decode(req.await, p); decode(req.roots, p); + DECODE_FINISH(p); } void encode(QuiesceDbListing const& listing, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(listing.db_version, bl, features); - encode(listing.db_age, bl, features); + ::encode(listing.db_age, bl, features); encode(listing.sets, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceDbListing& listing, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(listing.db_version, p); - decode(listing.db_age, p); + ::decode(listing.db_age, p); decode(listing.sets, p); + DECODE_FINISH(p); } void encode(QuiesceDbPeerListing const& listing, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(listing.origin, bl, features); encode(listing.db, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceDbPeerListing& listing, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(listing.origin, p); decode(listing.db, p); + DECODE_FINISH(p); } void encode(QuiesceMap::RootInfo const& root, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(root.state, bl, features); - encode(root.ttl, bl, features); + ::encode(root.ttl, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceMap::RootInfo& root, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(root.state, p); - decode(root.ttl, p); + ::decode(root.ttl, p); + DECODE_FINISH(p); } void encode(QuiesceMap const& map, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(map.db_version, bl, features); encode(map.roots, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceMap& map, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(map.db_version, p); decode(map.roots, p); + DECODE_FINISH(p); } void encode(QuiesceDbPeerAck const& ack, bufferlist& bl, uint64_t features = 0) { + ENCODE_START(1, 1, bl); encode(ack.origin, bl, features); encode(ack.diff_map, bl, features); + ENCODE_FINISH(bl); } void decode(QuiesceDbPeerAck& ack, bufferlist::const_iterator& p) { + DECODE_START(1, p); decode(ack.origin, p); decode(ack.diff_map, p); + DECODE_FINISH(p); } diff --git a/src/mds/QuiesceDbManager.cc b/src/mds/QuiesceDbManager.cc index ff497aafe45..ca69fef73fd 100644 --- a/src/mds/QuiesceDbManager.cc +++ b/src/mds/QuiesceDbManager.cc @@ -421,7 +421,7 @@ void QuiesceDbManager::leader_record_ack(QuiesceInterface::PeerId from, QuiesceM if (diff_map.db_version > db_version()) { dout(3) << "ignoring unknown version ack by rank " << from << " (" << diff_map.db_version << " > " << db_version() << ")" << dendl; dout(5) << "will send the peer a full DB" << dendl; - info.diff_map.reset(); + info.diff_map.clear(); } else { info.diff_map = std::move(diff_map); info.last_seen = QuiesceClock::now(); diff --git a/src/messages/MMDSQuiesceDbAck.h b/src/messages/MMDSQuiesceDbAck.h index b68445dfb09..48bc5e37a86 100644 --- a/src/messages/MMDSQuiesceDbAck.h +++ b/src/messages/MMDSQuiesceDbAck.h @@ -3,12 +3,12 @@ /* * Ceph - scalable distributed file system * - * Copyright (C) 2004-2006 Sage Weil + * Copyright (C) 2024 IBM, Red Hat * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software - * Foundation. See file COPYING. + * Foundation. See file COPYING. * */ @@ -36,9 +36,7 @@ public: void encode_payload_from(QuiesceDbPeerAck const& ack) { - ENCODE_START(QuiesceDbEncoding::version, QuiesceDbEncoding::compat, payload); - encode(ack, payload); - ENCODE_FINISH(payload); + ::encode(ack, payload); } void decode_payload() override { @@ -48,9 +46,7 @@ public: void decode_payload_into(QuiesceDbPeerAck &ack) const { auto p = payload.cbegin(); - DECODE_START(QuiesceDbEncoding::version, p); - decode(ack, p); - DECODE_FINISH(p); + ::decode(ack, p); } private: diff --git a/src/messages/MMDSQuiesceDbListing.h b/src/messages/MMDSQuiesceDbListing.h index 5fd068adb56..39d72fb8eb4 100644 --- a/src/messages/MMDSQuiesceDbListing.h +++ b/src/messages/MMDSQuiesceDbListing.h @@ -3,7 +3,7 @@ /* * Ceph - scalable distributed file system * - * Copyright (C) 2004-2006 Sage Weil + * Copyright (C) 2024 IBM, Red Hat * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -35,9 +35,7 @@ public: void encode_payload_from(QuiesceDbPeerListing const& peer_listing) { - ENCODE_START(QuiesceDbEncoding::version, QuiesceDbEncoding::compat, payload); - encode(peer_listing, payload); - ENCODE_FINISH(payload); + ::encode(peer_listing, payload); } void decode_payload() override { @@ -47,9 +45,7 @@ public: void decode_payload_into(QuiesceDbPeerListing &peer_listing) const { auto p = payload.cbegin(); - DECODE_START(QuiesceDbEncoding::version, p); - decode(peer_listing, p); - DECODE_FINISH(p); + ::decode(peer_listing, p); } private: diff --git a/src/test/mds/TestQuiesceAgent.cc b/src/test/mds/TestQuiesceAgent.cc index d7d526ae9a6..ae951158954 100644 --- a/src/test/mds/TestQuiesceAgent.cc +++ b/src/test/mds/TestQuiesceAgent.cc @@ -333,7 +333,7 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) { EXPECT_EQ(1, latest_ack.roots.size()); EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state); - latest_ack.reset(); + latest_ack.clear(); // complete the other root with failure EXPECT_TRUE(complete_quiesce("root2", -1)); @@ -344,7 +344,7 @@ TEST_F(QuiesceAgentTest, QuiesceProtocol) { EXPECT_EQ(QS_QUIESCED, latest_ack.roots.at("root1").state); EXPECT_EQ(QS_FAILED, latest_ack.roots.at("root2").state); - latest_ack.reset(); + latest_ack.clear(); // complete the third root with success // complete one root with success @@ -457,7 +457,7 @@ TEST_F(QuiesceAgentTest, DuplicateQuiesceRequest) { EXPECT_TRUE(quiesce_requests.contains("root1")); EXPECT_TRUE(quiesce_requests.contains("root2")); - latest_ack.reset(); + latest_ack.clear(); // now, bring the roots back { auto ack = update(3, { diff --git a/src/test/mds/TestQuiesceDb.cc b/src/test/mds/TestQuiesceDb.cc index 8cd168424e5..f930f6c042a 100644 --- a/src/test/mds/TestQuiesceDb.cc +++ b/src/test/mds/TestQuiesceDb.cc @@ -10,6 +10,7 @@ * */ #include "mds/QuiesceDbManager.h" +#include "mds/QuiesceDbEncoding.h" #include "gtest/gtest.h" #include "common/Cond.h" #include @@ -151,6 +152,13 @@ class QuiesceDbTest: public testing::Test { if (epoch == this->epoch) { if (this->managers.contains(recipient)) { dout(10) << "listing from " << me << " (leader=" << leader << ") to " << recipient << " for version " << listing.db_version << " with " << listing.sets.size() << " sets" << dendl; + + ceph::bufferlist bl; + encode(listing, bl); + listing.clear(); + auto p = bl.cbegin(); + decode(listing, p); + this->managers[recipient]->submit_peer_listing({me, std::move(listing)}); comms_cond.notify_all(); return 0; @@ -181,6 +189,13 @@ class QuiesceDbTest: public testing::Test { it++; } } + + ceph::bufferlist bl; + encode(diff_map, bl); + diff_map.clear(); + auto p = bl.cbegin(); + decode(diff_map, p); + this->managers[leader]->submit_peer_ack({me, std::move(diff_map)}); comms_cond.notify_all(); l.unlock(); -- 2.39.5