]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: test connections to bootstrapping MDS
authorPatrick Donnelly <pdonnell@redhat.com>
Mon, 8 Nov 2021 19:55:05 +0000 (14:55 -0500)
committerPatrick Donnelly <pdonnell@redhat.com>
Tue, 23 Nov 2021 14:23:09 +0000 (09:23 -0500)
Signed-off-by: Patrick Donnelly <pdonnell@redhat.com>
qa/suites/fs/thrash/workloads/overrides/races.yaml [new file with mode: 0644]
qa/tasks/cephfs/test_failover.py
src/common/options/mds.yaml.in
src/mds/MDSRank.cc

diff --git a/qa/suites/fs/thrash/workloads/overrides/races.yaml b/qa/suites/fs/thrash/workloads/overrides/races.yaml
new file mode 100644 (file)
index 0000000..e7d7538
--- /dev/null
@@ -0,0 +1,5 @@
+overrides:
+  ceph:
+    conf:
+      mds:
+        mds_sleep_rank_change: 5000000.0
index 7558ede168483ec197bcfb34d25f9a072c608b9f..7147807bf521cc4e770d5eae1c5affdbae601391 100644 (file)
@@ -441,6 +441,16 @@ class TestFailover(CephFSTestCase):
         self.assertEqual(mds_0['gid'], self.fs.get_rank(rank=0)['gid'])
         self.fs.rank_freeze(False, rank=0)
 
+    def test_connect_bootstrapping(self):
+        self.config_set("mds", "mds_sleep_rank_change", 10000000.0)
+        self.config_set("mds", "mds_connect_bootstrapping", True)
+        self.fs.set_max_mds(2)
+        self.fs.wait_for_daemons()
+        self.fs.rank_fail(rank=0)
+        # rank 0 will get stuck in up:resolve, see https://tracker.ceph.com/issues/53194
+        self.fs.wait_for_daemons()
+
+
 class TestStandbyReplay(CephFSTestCase):
     CLIENTS_REQUIRED = 0
     MDSS_REQUIRED = 4
index f418be951ab62683e14670c41264ec115d21a438..dde26e8abf4e0a0c804775451932164d26bce5fb 100644 (file)
@@ -1409,3 +1409,15 @@ options:
   - mds
   flags:
   - runtime
+- name: mds_sleep_rank_change
+  type: float
+  level: dev
+  default: 0.0
+  flags:
+  - runtime
+- name: mds_connect_bootstrapping
+  type: bool
+  level: dev
+  default: false
+  flags:
+  - runtime
index 186b528010e790cb712ad78ef416dd9c5ee02ab9..5aa1b126917bf153160383332ce4bf6e308a7995 100644 (file)
@@ -2247,9 +2247,16 @@ void MDSRankDispatcher::handle_mds_map(
 
   if (oldstate != state) {
     // update messenger.
-    if (state == MDSMap::STATE_STANDBY_REPLAY) {
+    auto sleep_rank_change = g_conf().get_val<double>("mds_sleep_rank_change");
+    if (unlikely(sleep_rank_change > 0)) {
+      // This is to trigger a race where another rank tries to connect to this
+      // MDS before an update to the messenger "myname" is processed. This race
+      // should be closed by ranks holding messages until the rank is out of a
+      // "bootstrapping" state.
+      usleep(sleep_rank_change);
+    } if (state == MDSMap::STATE_STANDBY_REPLAY) {
       dout(1) << "handle_mds_map i am now mds." << mds_gid << "." << incarnation
-             << " replaying mds." << whoami << "." << incarnation << dendl;
+          << " replaying mds." << whoami << "." << incarnation << dendl;
       messenger->set_myname(entity_name_t::MDS(mds_gid));
     } else {
       dout(1) << "handle_mds_map i am now mds." << whoami << "." << incarnation << dendl;
@@ -2437,6 +2444,18 @@ void MDSRankDispatcher::handle_mds_map(
     }
   }
 
+  // for testing...
+  if (unlikely(g_conf().get_val<bool>("mds_connect_bootstrapping"))) {
+    std::set<mds_rank_t> bootstrapping;
+    mdsmap->get_mds_set(bootstrapping, MDSMap::STATE_REPLAY);
+    mdsmap->get_mds_set(bootstrapping, MDSMap::STATE_CREATING);
+    mdsmap->get_mds_set(bootstrapping, MDSMap::STATE_STARTING);
+    for (const auto& rank : bootstrapping) {
+      auto m = make_message<MMDSMap>(monc->get_fsid(), *mdsmap);
+      send_message_mds(std::move(m), rank);
+    }
+  }
+
   // did someone go active?
   if (state >= MDSMap::STATE_CLIENTREPLAY &&
       oldstate >= MDSMap::STATE_CLIENTREPLAY) {