]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: test connections to bootstrapping MDS
authorPatrick Donnelly <pdonnell@redhat.com>
Mon, 8 Nov 2021 19:55:05 +0000 (14:55 -0500)
committerPatrick Donnelly <pdonnell@redhat.com>
Mon, 13 Dec 2021 15:49:52 +0000 (10:49 -0500)
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
(cherry picked from commit 402919cbe65c9fa219a9ec7a0f6534c6e1479f03)

Conflicts:
src/common/options/mds.yaml.in: legacy option conversion

qa/suites/fs/thrash/workloads/overrides/races.yaml [new file with mode: 0644]
qa/tasks/cephfs/test_failover.py
src/common/options.cc
src/mds/MDSRank.cc

diff --git a/qa/suites/fs/thrash/workloads/overrides/races.yaml b/qa/suites/fs/thrash/workloads/overrides/races.yaml
new file mode 100644 (file)
index 0000000..e7d7538
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds_sleep_rank_change: 5000000.0
index f2032b23741086e331a7ad1e0be3f99e40fce9d4..db33b10a86c2d2003542f5ad4d26a543ff3df741 100644 (file)
@@ -459,6 +459,16 @@ class TestFailover(CephFSTestCase):
         self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid'])
         self.fs.rank_freeze(False, rank=0)
 
+    def test_connect_bootstrapping(self):
+        self.config_set("mds", "mds_sleep_rank_change", 10000000.0)
+        self.config_set("mds", "mds_connect_bootstrapping", True)
+        self.fs.set_max_mds(2)
+        self.fs.wait_for_daemons()
+        self.fs.rank_fail(rank=0)
+        # rank 0 will get stuck in up:resolve, see https://tracker.ceph.com/issues/53194
+        self.fs.wait_for_daemons()
+
+
 class TestStandbyReplay(CephFSTestCase):
     CLIENTS_REQUIRED = 0
     MDSS_REQUIRED = 4
index 6a2115e15a0ab5a689e8e7af5c48782e8c4d3b13..a5d22fb638506e7afdd2de6e0ffc5c515ca9f600 100644 (file)
@@ -8874,6 +8874,16 @@ std::vector<Option> get_mds_options() {
      .set_description("interval in seconds for sending ping messages to active MDSs.")
      .set_long_description("interval in seconds for rank 0 to send ping messages to all active MDSs."),
 
+    Option("mds_sleep_rank_change", Option::TYPE_FLOAT, Option::LEVEL_DEV)
+     .set_default(0.0)
+     .set_flag(Option::FLAG_RUNTIME)
+     .set_description(""),
+
+    Option("mds_connect_bootstrapping", Option::TYPE_BOOL, Option::LEVEL_DEV)
+     .set_default(false)
+     .set_flag(Option::FLAG_RUNTIME)
+     .set_description(""),
+
     Option("mds_metrics_update_interval", Option::TYPE_SECS, Option::LEVEL_ADVANCED)
      .set_default(2)
      .set_flag(Option::FLAG_RUNTIME)
index 3d5ed6e2c59b18e94d1a11209895b28aac671d1a..e2eab04c9d6fc23a135694ead67227484a3551bd 100644 (file)
@@ -2243,9 +2243,16 @@ void MDSRankDispatcher::handle_mds_map(
 
   if (oldstate != state) {
     // update messenger.
-    if (state == MDSMap::STATE_STANDBY_REPLAY) {
+    auto sleep_rank_change = g_conf().get_val<double>("mds_sleep_rank_change");
+    if (unlikely(sleep_rank_change > 0)) {
+      // This is to trigger a race where another rank tries to connect to this
+      // MDS before an update to the messenger "myname" is processed. This race
+      // should be closed by ranks holding messages until the rank is out of a
+      // "bootstrapping" state.
+      usleep(sleep_rank_change);
+    } if (state == MDSMap::STATE_STANDBY_REPLAY) {
       dout(1) << "handle_mds_map i am now mds." << mds_gid << "." << incarnation
-             << " replaying mds." << whoami << "." << incarnation << dendl;
+          << " replaying mds." << whoami << "." << incarnation << dendl;
       messenger->set_myname(entity_name_t::MDS(mds_gid));
     } else {
       dout(1) << "handle_mds_map i am now mds." << whoami << "." << incarnation << dendl;
@@ -2433,6 +2440,18 @@ void MDSRankDispatcher::handle_mds_map(
     }
   }
 
+  // for testing...
+  if (unlikely(g_conf().get_val<bool>("mds_connect_bootstrapping"))) {
+    std::set<mds_rank_t> bootstrapping;
+    mdsmap->get_mds_set(bootstrapping, MDSMap::STATE_REPLAY);
+    mdsmap->get_mds_set(bootstrapping, MDSMap::STATE_CREATING);
+    mdsmap->get_mds_set(bootstrapping, MDSMap::STATE_STARTING);
+    for (const auto& rank : bootstrapping) {
+      auto m = make_message<MMDSMap>(monc->get_fsid(), *mdsmap);
+      send_message_mds(std::move(m), rank);
+    }
+  }
+
   // did someone go active?
   if (state >= MDSMap::STATE_CLIENTREPLAY &&
       oldstate >= MDSMap::STATE_CLIENTREPLAY) {