]> git.apps.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
mds/quiesce: resolve the quiesce cluster at the mds monitor
authorLeonid Usov <leonid.usov@ibm.com>
Wed, 28 Feb 2024 13:11:17 +0000 (15:11 +0200)
committerLeonid Usov <leonid.usov@ibm.com>
Mon, 4 Mar 2024 13:13:23 +0000 (15:13 +0200)
Signed-off-by: Leonid Usov <leonid.usov@ibm.com>
13 files changed:
qa/tasks/mgr/dashboard/test_health.py
src/include/cephfs/types.h
src/mds/FSMap.h
src/mds/MDSMap.cc
src/mds/MDSMap.h
src/mds/MDSRankQuiesce.cc
src/mds/QuiesceDbManager.h
src/mon/MDSMonitor.cc
src/mon/MDSMonitor.h
src/pybind/mgr/mgr_module.py
src/pybind/mgr/volumes/fs/volume.py
src/pybind/mgr/volumes/module.py
src/test/mds/TestQuiesceDb.cc

index 0b7b7a3b449c1923980843889af1ac4ee67e40f1..3ebea97e3d29eec69b77c2a6053a2c81db4b3354 100644 (file)
@@ -63,7 +63,9 @@ class HealthTest(DashboardTestCase):
             'balance_automate': bool,
         }),
         'ever_allowed_features': int,
-        'root': int
+        'root': int,
+        'qdb_leader': int,
+        'qdb_cluster': JList(int)
     })
 
     def test_minimal_health(self):
index 068c9ef81995df9c9a276e2d60d9f9e0d2313dc4..23f6e44a9c2b37a6e9252b8a7ca7275ce5110d9e 100644 (file)
 BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t)
 extern const mds_gid_t MDS_GID_NONE;
 
+template <>
+struct std::hash<mds_gid_t> {
+  size_t operator()(const mds_gid_t& gid) const
+  {
+    return hash<uint64_t> {}(gid);
+  }
+};
+
 typedef int32_t fs_cluster_id_t;
 constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = -1;
 
index a6aa92f218be76a3629e45bfee115eb04580d2e9..e2046fb7af862e6b257018e65d766021d76c788f 100644 (file)
@@ -21,6 +21,7 @@
 #include <set>
 #include <string>
 #include <string_view>
+#include <type_traits>
 
 #include <errno.h>
 
@@ -479,9 +480,18 @@ public:
   void modify_filesystem(fs_cluster_id_t fscid, T&& fn)
   {
     auto& fs = filesystems.at(fscid);
-    fn(fs);
-    fs.mds_map.epoch = epoch;
-    fs.mds_map.modified = ceph_clock_now();
+    bool did_update = true;
+
+    if constexpr (std::is_convertible_v<std::invoke_result_t<T, Filesystem&>, bool>) {
+      did_update = fn(fs);
+    } else {
+      fn(fs);
+    }
+    
+    if (did_update) {
+      fs.mds_map.epoch = epoch;
+      fs.mds_map.modified = ceph_clock_now();
+    }
   }
 
   /* This is method is written for the option of "ceph fs swap" commmand
index 4096b23ab9d8b6c9633f4bddf8df7a2796ffebb8..bedeed165ab929613f69a4726e0ec37ec1c8f7a7 100644 (file)
@@ -228,6 +228,12 @@ void MDSMap::dump(Formatter *f) const
   f->dump_string("balancer", balancer);
   f->dump_string("bal_rank_mask", bal_rank_mask);
   f->dump_int("standby_count_wanted", std::max(0, standby_count_wanted));
+  f->dump_unsigned("qdb_leader", qdb_cluster_leader);
+  f->open_array_section("qdb_cluster");
+  for (auto m: qdb_cluster_members) {
+    f->dump_int("member", m);
+  }
+  f->close_section();
 }
 
 void MDSMap::dump_flags_state(Formatter *f) const
@@ -290,6 +296,7 @@ void MDSMap::print(ostream& out) const
   out << "balancer\t" << balancer << "\n";
   out << "bal_rank_mask\t" << bal_rank_mask << "\n";
   out << "standby_count_wanted\t" << std::max(0, standby_count_wanted) << "\n";
+  out << "qdb_cluster\tleader: " << qdb_cluster_leader << " members: " << qdb_cluster_members << std::endl;
 
   multimap< pair<mds_rank_t, unsigned>, mds_gid_t > foo;
   for (const auto &p : mds_info) {
@@ -773,7 +780,7 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const
   encode(data_pools, bl);
   encode(cas_pool, bl);
 
-  __u16 ev = 18;
+  __u16 ev = 19;
   encode(ev, bl);
   encode(compat, bl);
   encode(metadata_pool, bl);
@@ -802,6 +809,8 @@ void MDSMap::encode(bufferlist& bl, uint64_t features) const
   encode(required_client_features, bl);
   encode(bal_rank_mask, bl);
   encode(max_xattr_size, bl);
+  encode(qdb_cluster_leader, bl);
+  encode(qdb_cluster_members, bl);
   ENCODE_FINISH(bl);
 }
 
@@ -957,6 +966,11 @@ void MDSMap::decode(bufferlist::const_iterator& p)
     decode(max_xattr_size, p);
   }
 
+  if (ev >= 19) {
+    decode(qdb_cluster_leader, p);
+    decode(qdb_cluster_members, p);
+  }
+
   /* All MDS since at least v14.0.0 understand INLINE */
   /* TODO: remove after R is released */
   compat.incompat.insert(MDS_FEATURE_INCOMPAT_INLINE);
index 746ae8597151bc90a28eae43579c69c73fa44209..2dd8fba8342343f2941fa3621a8f6e147709a951 100644 (file)
@@ -19,6 +19,7 @@
 #include <map>
 #include <set>
 #include <string>
+#include <ranges>
 #include <string_view>
 
 #include <errno.h>
@@ -312,6 +313,29 @@ public:
   mds_rank_t get_tableserver() const { return tableserver; }
   mds_rank_t get_root() const { return root; }
 
+  void get_quiesce_db_cluster(mds_gid_t &leader, std::unordered_set<mds_gid_t> &members) const {
+    leader = qdb_cluster_leader;
+    members = qdb_cluster_members; 
+  }
+
+  mds_gid_t get_quiesce_db_cluster_leader() {
+    return qdb_cluster_leader;
+  }
+
+  bool update_quiesce_db_cluster(mds_gid_t const& leader, std::same_as<std::unordered_set<mds_gid_t>> auto && members) {
+    if (leader == qdb_cluster_leader && members == qdb_cluster_members) {
+      return false;
+    }
+
+    ceph_assert(leader == MDS_GID_NONE || mds_info.contains(leader));
+    ceph_assert(std::ranges::all_of(members, [this](auto &m) {return mds_info.contains(m);}));
+
+    qdb_cluster_leader = leader;
+    qdb_cluster_members = members;
+
+    return true;
+  }
+
   const std::vector<int64_t> &get_data_pools() const { return data_pools; }
   int64_t get_first_data_pool() const { return *data_pools.begin(); }
   int64_t get_metadata_pool() const { return metadata_pool; }
@@ -634,6 +658,8 @@ protected:
 
   mds_rank_t tableserver = 0;   // which MDS has snaptable
   mds_rank_t root = 0;          // which MDS has root directory
+  std::unordered_set<mds_gid_t> qdb_cluster_members;
+  mds_gid_t qdb_cluster_leader = MDS_GID_NONE;
 
   __u32 session_timeout = 60;
   __u32 session_autoclose = 300;
index 5d92c0bd818a38e0363bf522dc980d12adfe45c7..bcc8e149ed7fa6708a190e3be49330405f4e159e 100644 (file)
@@ -101,6 +101,7 @@ void MDSRank::command_quiesce_db(const cmdmap_t& cmdmap, std::function<void(int,
   struct Ctx : public QuiesceDbManager::RequestContext {
     std::function<void(int, const std::string&, bufferlist&)> on_finish;
     bool all = false;
+    mds_gid_t me;
 
     double sec(QuiesceTimeInterval duration) {
       return duration_cast<dd>(duration).count();
@@ -126,6 +127,7 @@ void MDSRank::command_quiesce_db(const cmdmap_t& cmdmap, std::function<void(int,
 
       f->open_object_section("response"); {
         f->dump_int("epoch", response.db_version.epoch);
+        f->dump_int("leader", me);
         f->dump_int("set_version", response.db_version.set_version);
         f->open_object_section("sets"); {
           for (auto&& [set_id, set] : response.sets) {
@@ -168,8 +170,10 @@ void MDSRank::command_quiesce_db(const cmdmap_t& cmdmap, std::function<void(int,
 
   auto* ctx = new Ctx();
 
+  QuiesceInterface::PeerId me = mds_gid_t(monc->get_global_id());
   ctx->on_finish = std::move(on_finish);
   ctx->all = all;
+  ctx->me = me;
 
   ctx->request.reset([&](auto& r) {
     r.set_id = set_id;
@@ -212,6 +216,12 @@ void MDSRank::command_quiesce_db(const cmdmap_t& cmdmap, std::function<void(int,
   int rc = quiesce_db_manager->submit_request(ctx);
   if (rc != 0) {
     bufferlist bl;
+    auto f = Formatter::create_unique("json-pretty");
+    f->open_object_section("response");
+    f->dump_int("epoch", mdsmap->get_epoch());
+    f->dump_int("leader", mdsmap->get_quiesce_db_cluster_leader());
+    f->close_section();
+    f->flush(bl);
     // on_finish was moved there, so should only call via the ctx.
     ctx->on_finish(rc, "Error submitting the command to the local db manager", bl);
     delete ctx;
@@ -234,62 +244,35 @@ static void rebind_agent_callback(std::shared_ptr<QuiesceAgent> agt, std::shared
 
 void MDSRank::quiesce_cluster_update() {
   // the quiesce leader is the lowest rank with the highest state up to ACTIVE
-  auto less_leader = [](MDSMap::mds_info_t const* l, MDSMap::mds_info_t const* r) {
-    ceph_assert(l->rank != MDS_RANK_NONE);
-    ceph_assert(r->rank != MDS_RANK_NONE);
-    ceph_assert(l->state <= MDSMap::STATE_ACTIVE);
-    ceph_assert(r->state <= MDSMap::STATE_ACTIVE);
-    if (l->rank == r->rank) {
-      return l->state < r->state;
-    } else {
-      return l->rank > r->rank;
-    }
-  };
-
-  std::priority_queue<MDSMap::mds_info_t const*, std::vector<MDSMap::mds_info_t const*>, decltype(less_leader)> member_info(less_leader);
   QuiesceClusterMembership membership;
-
   QuiesceInterface::PeerId me = mds_gid_t(monc->get_global_id());
 
-  for (auto&& [gid, info] : mdsmap->get_mds_info()) {
-    // if it has a rank and state <= ACTIVE, it's good enough
-    // if (info.rank != MDS_RANK_NONE && info.state <= MDSMap::STATE_ACTIVE) {
-    if (info.rank != MDS_RANK_NONE && info.state == MDSMap::STATE_ACTIVE) {
-      member_info.push(&info);
-      membership.members.insert(info.global_id);
-    }
-  }
-
-  QuiesceInterface::PeerId leader = 
-    member_info.empty() 
-    ? QuiesceClusterMembership::INVALID_MEMBER 
-    : member_info.top()->global_id;
+  mdsmap->get_quiesce_db_cluster(membership.leader, membership.members);
 
   membership.epoch = mdsmap->get_epoch();
-  membership.leader = leader;
   membership.me = me;
   membership.fs_name = mdsmap->get_fs_name();
 
-  dout(5) << "epoch:" << membership.epoch << " me:" << me << " leader:" << leader << " members:" << membership.members 
+  dout(5) << "epoch:" << membership.epoch << " me:" << me << " leader:" << membership.leader << " members:" << membership.members 
     << (mdsmap->is_degraded() ? " (degraded)" : "") << dendl;
 
-  if (leader != QuiesceClusterMembership::INVALID_MEMBER) {
+  if (membership.leader != QuiesceClusterMembership::INVALID_MEMBER) {
     membership.send_ack = [=, this](QuiesceMap&& ack) {
-      if (me == leader) {
+      if (me == membership.leader) {
         // loopback
         quiesce_db_manager->submit_ack_from(me, std::move(ack));
         return 0;
       } else {
         std::lock_guard guard(mds_lock);
 
-        if (mdsmap->get_state_gid(leader) == MDSMap::STATE_NULL) {
-          dout(5) << "couldn't find the leader " << leader << " in the map" << dendl;
+        if (mdsmap->get_state_gid(membership.leader) == MDSMap::STATE_NULL) {
+          dout(5) << "couldn't find the leader " << membership.leader << " in the map" << dendl;
           return -ENOENT;
         }
-        auto addrs = mdsmap->get_info_gid(leader).addrs;
+        auto addrs = mdsmap->get_info_gid(membership.leader).addrs;
 
         auto ack_msg = make_message<MMDSQuiesceDbAck>();
-        dout(10) << "sending ack " << ack << " to the leader " << leader << dendl;
+        dout(10) << "sending ack " << ack << " to the leader " << membership.leader << dendl;
         ack_msg->encode_payload_from(me, ack);
         return send_message_mds(ack_msg, addrs);
       }
index a20b0cfc70ac2396e00c326a906f19fba13e268f..7b7e77ff2f676c6ee2d0566b8238cb015073600c 100644 (file)
 #include <set>
 #include <queue>
 
-template <>
-struct std::hash<mds_gid_t> {
-  size_t operator()(const mds_gid_t& gid) const
-  {
-    return hash<uint64_t> {}(gid);
-  }
-};
-
 struct QuiesceClusterMembership {
   static const QuiesceInterface::PeerId INVALID_MEMBER;
 
@@ -36,7 +28,7 @@ struct QuiesceClusterMembership {
 
   QuiesceInterface::PeerId me = INVALID_MEMBER;
   QuiesceInterface::PeerId leader = INVALID_MEMBER;
-  std::set<QuiesceInterface::PeerId> members;
+  std::unordered_set<QuiesceInterface::PeerId> members;
 
   // A courier interface to decouple from the messaging layer
   // Failures can be ignored, manager will call this repeatedly if needed
@@ -69,7 +61,7 @@ class QuiesceDbManager {
 
     // ============================
     // quiesce db leader interface: 
-    //    -> EPERM unless this is the leader
+    //    -> ENOTTY unless this is the leader
     
     // client interface to the DB
     int submit_request(RequestContext* ctx) {
index aeefe5c9592d6f8e1d4b9fac58e637c32f6c56e0..c319715a6a85c3dba587e8ac7e3f5e66fc105567 100644 (file)
@@ -14,6 +14,8 @@
 
 #include <regex>
 #include <sstream>
+#include <queue>
+#include <ranges>
 #include <boost/utility.hpp>
 
 #include "MDSMonitor.h"
@@ -174,11 +176,61 @@ void MDSMonitor::create_pending()
   dout(10) << "create_pending e" << fsmap.get_epoch() << dendl;
 }
 
+void MDSMonitor::assign_quiesce_db_leader(FSMap &fsmap) {
+
+  // the quiesce leader is the lowest rank with the highest state up to ACTIVE
+  auto less_leader = [](MDSMap::mds_info_t const* l, MDSMap::mds_info_t const* r) {
+    ceph_assert(l->rank != MDS_RANK_NONE);
+    ceph_assert(r->rank != MDS_RANK_NONE);
+    ceph_assert(l->state <= MDSMap::STATE_ACTIVE);
+    ceph_assert(r->state <= MDSMap::STATE_ACTIVE);
+    if (l->rank == r->rank) {
+      return l->state < r->state;
+    } else {
+      return l->rank > r->rank;
+    }
+  };
+
+  for (const auto& [fscid, fs] : std::as_const(fsmap)) {
+    auto &&mdsmap = fs.get_mds_map();
+
+    if (mdsmap.get_epoch() < fsmap.get_epoch()) {
+      // no changes in this fs, we can skip the calculation below
+      // NB! be careful with this clause when updating the leader selection logic.
+      // When the input from outside of this fsmap will affect the decision
+      // this clause will have to be updated, too.
+      continue;
+    }
+
+    std::priority_queue<MDSMap::mds_info_t const*, std::vector<MDSMap::mds_info_t const*>, decltype(less_leader)> 
+      member_info(less_leader);
+    
+    std::unordered_set<mds_gid_t> members;
+
+    for (auto&& [gid, info] : mdsmap.get_mds_info()) {
+      // if it has a rank and state <= ACTIVE, it's good enough
+      // if (info.rank != MDS_RANK_NONE && info.state <= MDSMap::STATE_ACTIVE) {
+      if (info.rank != MDS_RANK_NONE && info.state == MDSMap::STATE_ACTIVE) {
+        member_info.push(&info);
+        members.insert(info.global_id);
+      }
+    }
+
+    auto leader = member_info.empty() ? MDS_GID_NONE : member_info.top()->global_id;
+
+    fsmap.modify_filesystem(fscid, [&leader, &members](auto &writable_fs) -> bool {
+      return writable_fs.get_mds_map().update_quiesce_db_cluster(leader, std::move(members));
+    });
+  }
+}
+
 void MDSMonitor::encode_pending(MonitorDBStore::TransactionRef t)
 {
   auto &pending = get_pending_fsmap_writeable();
   auto epoch = pending.get_epoch();
 
+  assign_quiesce_db_leader(pending);
+
   dout(10) << "encode_pending e" << epoch << dendl;
 
   // print map iff 'debug mon = 30' or higher
index 36d53fe4e48322baf866ba27e235758cf3082621..25f27535c77a6eec5c1aeed48f13d7b1af17bc86 100644 (file)
@@ -129,6 +129,8 @@ class MDSMonitor : public PaxosService, public PaxosFSMap, protected CommandHand
   int load_metadata(std::map<mds_gid_t, Metadata>& m);
   void count_metadata(const std::string& field, ceph::Formatter *f);
 
+  void assign_quiesce_db_leader(FSMap &fsmap);
+
 public:
   void print_fs_summary(std::ostream& out) {
     get_fsmap().print_fs_summary(out);
index 2e25f994b6b7fabd699a0b96279417319a7b5a61..41e54fd8790ea4deb6f816a8d8c515edcf035b23 100644 (file)
@@ -1731,42 +1731,9 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
         ))
 
         return r
-    
-    MDS_STATE_ORD = {
-        "down:dne":              0, # CEPH_MDS_STATE_DNE,
-        "down:stopped":         -1, # CEPH_MDS_STATE_STOPPED,
-        "down:damaged":         15, # CEPH_MDS_STATE_DAMAGED,
-        "up:boot":              -4, # CEPH_MDS_STATE_BOOT,
-        "up:standby":           -5, # CEPH_MDS_STATE_STANDBY,
-        "up:standby-replay":    -8, # CEPH_MDS_STATE_STANDBY_REPLAY,
-        "up:oneshot-replay":    -9, # CEPH_MDS_STATE_REPLAYONCE,
-        "up:creating":          -6, # CEPH_MDS_STATE_CREATING,
-        "up:starting":          -7, # CEPH_MDS_STATE_STARTING,
-        "up:replay":             8, # CEPH_MDS_STATE_REPLAY,
-        "up:resolve":            9, # CEPH_MDS_STATE_RESOLVE,
-        "up:reconnect":         10, # CEPH_MDS_STATE_RECONNECT,
-        "up:rejoin":            11, # CEPH_MDS_STATE_REJOIN,
-        "up:clientreplay":      12, # CEPH_MDS_STATE_CLIENTREPLAY,
-        "up:active":            13, # CEPH_MDS_STATE_ACTIVE,
-        "up:stopping":          14, # CEPH_MDS_STATE_STOPPING,
-    }
-    MDS_STATE_ACTIVE_ORD = MDS_STATE_ORD["up:active"]
-
-    def get_quiesce_leader_info(self, fscid: str) -> Optional[dict]:
-        """
-        Helper for `tell_quiesce_leader` to chose the mds to send the command to.
-
-        Quiesce DB is managed by a leader which is selected based on the current MDSMap
-        The logic is currently implemented both here and on the MDS side,
-        see MDSRank::quiesce_cluster_update().
-
-        Ideally, this logic should be part of the MDSMonitor and the result should
-        be exposed via a dedicated field in the map, but until that is implemented
-        this function will have to be kept in sync with the corresponding logic
-        on the MDS side
-        """
-        leader_info: Optional[dict] = None
 
+    def get_quiesce_leader_gid(self, fscid: str) -> Optional[int]:
+        leader_gid : Optional[int] = None
         for fs in self.get("fs_map")['filesystems']:
             if fscid != fs["id"]:
                 continue
@@ -1774,33 +1741,30 @@ class MgrModule(ceph_module.BaseMgrModule, MgrModuleLoggingMixin):
             # quiesce leader is the lowest rank
             # with the highest state
             mdsmap = fs["mdsmap"]
-            for info in mdsmap['info'].values():
-                if info['rank'] == -1:
-                    continue
-                if leader_info is None:
-                    leader_info = info
-                else:
-                    if info['rank'] < leader_info['rank']:
-                        leader_info = info
-                    elif info['rank'] == leader_info['rank']:
-                        state_ord = self.MDS_STATE_ORD.get(info['state'])
-                        # if there are more than one daemons with the same rank
-                        # only one of them can be active
-                        if state_ord == self.MDS_STATE_ACTIVE_ORD:
-                            leader_info = info
+            leader_gid = mdsmap.get("qdb_leader", None)
             break
 
-        return leader_info
-
-    def tell_quiesce_leader(self, fscid: str, cmd_dict: dict) -> Tuple[int, str, str]:
-        qleader = self.get_quiesce_leader_info(fscid)
-        if qleader is None:
-            self.log.warn("Couldn't resolve the quiesce leader for fscid %s" % fscid)
-            return (-errno.ENOENT, "", "Couldn't resolve the quiesce leader for fscid %s" % fscid)
-        self.log.debug("resolved quiesce leader for fscid {fscid} at daemon '{name}' gid {gid} rank {rank} ({state})".format(fscid=fscid, **qleader))
-        # We use the one_shot here to cover for cases when the mds crashes
-        # without this parameter the client may get stuck awaiting response from a dead MDS
-        return self.tell_command('mds', str(qleader['gid']), cmd_dict, one_shot=True)
+        return leader_gid
+
+    def tell_quiesce_leader(self, leader: int, cmd_dict: dict) -> Tuple[int, str, str]:
+        max_retries = 5
+        for _ in range(max_retries):
+            # We use "one_shot" here to cover for cases when the mds crashes
+            # without this parameter the client may get stuck awaiting response from a dead MDS
+            # (which is particularly bad for the volumes plugin finisher thread)
+            rc, stdout, stderr = self.tell_command('mds', str(leader), cmd_dict, one_shot=True)
+            if rc == -errno.ENOTTY:
+                try:
+                    resp = json.loads(stdout)
+                    leader = int(resp['leader'])
+                    self.log.info("Retrying a quiesce db command with leader %d" % leader)
+                except Exception as e:
+                    self.log.error("Couldn't parse ENOTTY response from an mds with error: %s\n%s" % (str(e), stdout))
+                    break
+            else:
+                break
+
+        return (rc, stdout, stderr)
 
     def send_command(
             self,
index ef3b171af76285255b1ebf7aae917dd502255ade..8fb7defed711ae05911558876f675feafd47da97 100644 (file)
@@ -456,19 +456,20 @@ class VolumeClient(CephfsClient["Module"]):
         volname    = cmd['vol_name']
         default_group_name  = cmd.get('group_name', None)
         roots = []
-        fscid = None
+        leader_gid = cmd.get('with_leader', None)
 
         with open_volume(self, volname) as fs_handle:
-            fscid = fs_handle.get_fscid()
+            if leader_gid is None:
+                fscid = fs_handle.get_fscid()
+                leader_gid = self.mgr.get_quiesce_leader_gid(fscid)
+                if leader_gid is None:
+                    return -errno.ENOENT, "", "Couldn't resolve the quiesce leader for volume %s (%s)" % (volname, fscid)
 
             if cmd.get('leader', False):
-                leader_info = self.mgr.get_quiesce_leader_info(fscid)
-                if leader_info is None:
-                    return -errno.ENOENT, "", "Couldn't resolve the quiesce leader for volume %s (%s)" % (volname, fscid)
                 return (
                     0,
-                    "mds.%d" % leader_info['gid'],
-                    "Resolved the quiesce leader for volume '{volname}' as daemon '{name}' ({gid}) {state} rank {rank}".format(volname=volname, **leader_info)
+                    "mds.%d" % leader_gid,
+                    "Resolved the quiesce leader for volume '{volname}' as gid {gid}".format(volname=volname, gid=leader_gid)
                 )
 
 
@@ -493,7 +494,7 @@ class VolumeClient(CephfsClient["Module"]):
         cmd['roots'] = roots
         cmd['prefix'] = 'quiesce db'
 
-        return self.mgr.tell_quiesce_leader(fscid, cmd)
+        return self.mgr.tell_quiesce_leader(leader_gid, cmd)
 
     def set_user_metadata(self, **kwargs):
         ret        = 0, "", ""
index ee36005d406dc9b74f3841c3c980ea1906af3a60..e059648261e9eb47dc08291482ba5cfc43bb369e 100644 (file)
@@ -291,7 +291,8 @@ class Module(orchestrator.OrchestratorClientMixin, MgrModule):
                    'name=all,type=CephBool,req=false '
                    'name=cancel,type=CephBool,req=false '
                    'name=group_name,type=CephString,req=false '
-                   'name=leader,type=CephBool,req=false ',
+                   'name=leader,type=CephBool,req=false '
+                   'name=with_leader,type=CephInt,range=0,req=false ',
             'desc': "Manage quiesce sets of subvolumes",
             'perm': 'rw'
         },
index e820db4044836a74df587a0edd6543ba729d2732..3c48474fa32b516c92b6836786cedebe15383093 100644 (file)
@@ -136,7 +136,7 @@ class QuiesceDbTest: public testing::Test {
     {
       ++epoch;
       ASSERT_GE(leader_and_replicas.size(), 1);
-      std::set<QuiesceInterface::PeerId> members(leader_and_replicas.begin(), leader_and_replicas.end());
+      std::unordered_set<QuiesceInterface::PeerId> members(leader_and_replicas.begin(), leader_and_replicas.end());
       auto leader = leader_and_replicas[0];
       for (const auto &[this_peer, mgr] : managers) {
         QuiesceClusterMembership mem = {