From: Patrick Donnelly Date: Wed, 28 Jul 2021 15:56:11 +0000 (-0700) Subject: qa: avoid stopping/restarting mds in journal repair tests X-Git-Tag: v17.1.0~940^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=14324ab5c28d7f86982a9040e6d3b6bae26fc02c;p=ceph.git qa: avoid stopping/restarting mds in journal repair tests It is enough to just fail ranks and manipulate the "joinable" flag of the fs. Signed-off-by: Patrick Donnelly --- diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py index b810e1a28eff8..06d40bf108254 100644 --- a/qa/tasks/cephfs/test_journal_repair.py +++ b/qa/tasks/cephfs/test_journal_repair.py @@ -67,8 +67,7 @@ class TestJournalRepair(CephFSTestCase): self.mount_a.umount_wait() # Stop the MDS - self.fs.mds_stop() - self.fs.mds_fail() + self.fs.fail() # Now, the journal should contain the operations, but the backing # store shouldn't @@ -88,7 +87,7 @@ class TestJournalRepair(CephFSTestCase): # Now check the MDS can read what we wrote: truncate the journal # and start the mds. self.fs.journal_tool(['journal', 'reset'], 0) - self.fs.mds_fail_restart() + self.fs.set_joinable() self.fs.wait_for_daemons() # List files @@ -160,12 +159,7 @@ class TestJournalRepair(CephFSTestCase): # Set max_mds to 2 self.fs.set_max_mds(2) status = self.fs.wait_for_daemons() - active_mds_names = self.fs.get_active_names(status=status) - - # Switch off any unneeded MDS daemons - for unneeded_mds in set(self.mds_cluster.mds_ids) - set(active_mds_names): - self.mds_cluster.mds_stop(unneeded_mds) - self.mds_cluster.mds_fail(unneeded_mds) + self.fs.set_joinable(False) # no unintended failover # Create a dir on each rank self.mount_a.run_shell_payload("mkdir {alpha,bravo} && touch {alpha,bravo}/file") @@ -183,11 +177,11 @@ class TestJournalRepair(CephFSTestCase): # Flush the journals so that we have some backing store data # belonging to one MDS, and some to the other MDS. - for mds_name in active_mds_names: - self.fs.mds_asok(["flush", "journal"], mds_name) + self.fs.rank_asok(["flush", "journal"], rank=0) + self.fs.rank_asok(["flush", "journal"], rank=1) # Stop (hard) the second MDS daemon - self.fs.mds_stop(active_mds_names[1]) + self.fs.rank_fail(rank=1) # Wipe out the tables for MDS rank 1 so that it is broken and can't start # (this is the simulated failure that we will demonstrate that the disaster @@ -218,8 +212,7 @@ class TestJournalRepair(CephFSTestCase): # See that the second MDS will crash when it starts and tries to # acquire rank 1 - damaged_id = active_mds_names[1] - self.fs.mds_restart(damaged_id) + self.fs.set_joinable(True) # The daemon taking the damaged rank should start starting, then # restart back into standby after asking the mon to mark the rank @@ -230,21 +223,8 @@ class TestJournalRepair(CephFSTestCase): self.wait_until_true(is_marked_damaged, 60) - def get_state(): - info = self.mds_cluster.get_mds_info(damaged_id) - return info['state'] if info is not None else None - - self.wait_until_equal( - get_state, - "up:standby", - timeout=60) - - self.fs.mds_stop(damaged_id) - self.fs.mds_fail(damaged_id) - # Now give up and go through a disaster recovery procedure - self.fs.mds_stop(active_mds_names[0]) - self.fs.mds_fail(active_mds_names[0]) + self.fs.fail() # Invoke recover_dentries quietly, because otherwise log spews millions of lines self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True) self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True) @@ -256,19 +236,15 @@ class TestJournalRepair(CephFSTestCase): # Bring an MDS back online, mount a client, and see that we can walk the full # filesystem tree again - self.fs.mds_fail_restart(active_mds_names[0]) + self.fs.set_joinable(True) # redundant with `fs reset` self.wait_until_equal(lambda: self.fs.get_active_names(), [active_mds_names[0]], 30, reject_fn=lambda v: len(v) > 1) self.mount_a.mount_wait() self.mount_a.run_shell(["ls", "-R"], wait=True) def test_table_tool(self): - active_mdss = self.fs.get_active_names() - self.assertEqual(len(active_mdss), 1) - mds_name = active_mdss[0] - self.mount_a.run_shell(["touch", "foo"]) - self.fs.mds_asok(["flush", "journal"], mds_name) + self.fs.rank_asok(["flush", "journal"]) log.info(self.fs.table_tool(["all", "show", "inode"])) log.info(self.fs.table_tool(["all", "show", "snap"])) @@ -398,8 +374,7 @@ class TestJournalRepair(CephFSTestCase): for mount in self.mounts: mount.umount_wait() - self.fs.mds_stop() - self.fs.mds_fail() + self.fs.fail() # journal tool smoke workunit(self.ctx, { @@ -412,7 +387,7 @@ class TestJournalRepair(CephFSTestCase): - self.fs.mds_restart() + self.fs.set_joinable() self.fs.wait_for_daemons() self.mount_a.mount_wait()