From e1340c05ad5f1cc49e9a551b53abe770d07b08c3 Mon Sep 17 00:00:00 2001
From: Jos Collin <jcollin@redhat.com>
Date: Wed, 28 Feb 2024 11:45:25 +0530
Subject: [PATCH] qa: fix cephfs-journal-tool command options and make fs
 inactive

Fixes: https://tracker.ceph.com/issues/62925
Signed-off-by: Jos Collin <jcollin@redhat.com>
(cherry picked from commit 0820b31d5b1f1542636c56611ec27636afc23b68)
---
 qa/tasks/cephfs/test_damage.py                   | 2 +-
 qa/tasks/cephfs/test_data_scan.py                | 4 ++--
 qa/tasks/cephfs/test_flush.py                    | 8 +++++++-
 qa/tasks/cephfs/test_forward_scrub.py            | 4 ++--
 qa/tasks/cephfs/test_journal_migration.py        | 3 +++
 qa/tasks/cephfs/test_journal_repair.py           | 4 ++--
 qa/tasks/cephfs/test_recovery_pool.py            | 4 ++--
 qa/workunits/fs/damage/test-first-damage.sh      | 2 +-
 qa/workunits/suites/cephfs_journal_tool_smoke.sh | 4 ++--
 9 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/qa/tasks/cephfs/test_damage.py b/qa/tasks/cephfs/test_damage.py
index ebf6dc03b6490..f963309461b45 100644
--- a/qa/tasks/cephfs/test_damage.py
+++ b/qa/tasks/cephfs/test_damage.py
@@ -498,7 +498,7 @@ class TestDamage(CephFSTestCase):
 
         # Drop everything from the MDS cache
         self.fs.fail()
-        self.fs.journal_tool(['journal', 'reset'], 0)
+        self.fs.journal_tool(['journal', 'reset', '--yes-i-really-really-mean-it'], 0)
         self.fs.set_joinable()
         self.fs.wait_for_daemons()
 
diff --git a/qa/tasks/cephfs/test_data_scan.py b/qa/tasks/cephfs/test_data_scan.py
index 6533ac98a2d68..7f5259aba9073 100644
--- a/qa/tasks/cephfs/test_data_scan.py
+++ b/qa/tasks/cephfs/test_data_scan.py
@@ -446,9 +446,9 @@ class TestDataScan(CephFSTestCase):
         if False:
             with self.assertRaises(CommandFailedError):
                 # Normal reset should fail when no objects are present, we'll use --force instead
-                self.fs.journal_tool(["journal", "reset"], 0)
+                self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0)
 
-        self.fs.journal_tool(["journal", "reset", "--force"], 0)
+        self.fs.journal_tool(["journal", "reset", "--force", "--yes-i-really-really-mean-it"], 0)
         self.fs.data_scan(["init"])
         self.fs.data_scan(["scan_extents"], worker_count=workers)
         self.fs.data_scan(["scan_inodes"], worker_count=workers)
diff --git a/qa/tasks/cephfs/test_flush.py b/qa/tasks/cephfs/test_flush.py
index 17cb849700eb3..c4373fa12fc9c 100644
--- a/qa/tasks/cephfs/test_flush.py
+++ b/qa/tasks/cephfs/test_flush.py
@@ -3,7 +3,6 @@ from textwrap import dedent
 from tasks.cephfs.cephfs_test_case import CephFSTestCase
 from tasks.cephfs.filesystem import ObjectNotFound, ROOT_INO
 
-
 class TestFlush(CephFSTestCase):
     def test_flush(self):
         self.mount_a.run_shell(["mkdir", "mydir"])
@@ -44,7 +43,10 @@ class TestFlush(CephFSTestCase):
 
         # ...and the journal is truncated to just a single subtreemap from the
         # newly created segment
+        self.fs.fail()
         summary_output = self.fs.journal_tool(["event", "get", "summary"], 0)
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
         try:
             self.assertEqual(summary_output,
                              dedent(
@@ -72,6 +74,8 @@ class TestFlush(CephFSTestCase):
                              ).strip())
             flush_data = self.fs.mds_asok(["flush", "journal"])
             self.assertEqual(flush_data['return_code'], 0)
+
+            self.fs.fail()
             self.assertEqual(self.fs.journal_tool(["event", "get", "summary"], 0),
                              dedent(
                                  """
@@ -80,6 +84,8 @@ class TestFlush(CephFSTestCase):
                                  Errors: 0
                                  """
                              ).strip())
+            self.fs.set_joinable()
+            self.fs.wait_for_daemons()
 
         # Now for deletion!
         # We will count the RADOS deletions and MDS file purges, to verify that
diff --git a/qa/tasks/cephfs/test_forward_scrub.py b/qa/tasks/cephfs/test_forward_scrub.py
index 12a0fa6dafaf2..a18839f76ae7e 100644
--- a/qa/tasks/cephfs/test_forward_scrub.py
+++ b/qa/tasks/cephfs/test_forward_scrub.py
@@ -184,7 +184,7 @@ class TestForwardScrub(CephFSTestCase):
         # inotable versions (due to scan_links)
         self.fs.flush()
         self.fs.fail()
-        self.fs.journal_tool(["journal", "reset", "--force"], 0)
+        self.fs.journal_tool(["journal", "reset", "--force", "--yes-i-really-really-mean-it"], 0)
 
         # Run cephfs-data-scan targeting only orphans
         self.fs.data_scan(["scan_extents", self.fs.get_data_pool_name()])
@@ -411,7 +411,7 @@ class TestForwardScrub(CephFSTestCase):
 
         self.fs.radosm(["rm", "{0:x}.00000000".format(dir_ino)])
 
-        self.fs.journal_tool(['journal', 'reset'], 0)
+        self.fs.journal_tool(['journal', 'reset', '--yes-i-really-really-mean-it'], 0)
         self.fs.set_joinable()
         self.fs.wait_for_daemons()
         self.mount_a.mount_wait()
diff --git a/qa/tasks/cephfs/test_journal_migration.py b/qa/tasks/cephfs/test_journal_migration.py
index 67b514c22f1c3..1ae7aa528ff9e 100644
--- a/qa/tasks/cephfs/test_journal_migration.py
+++ b/qa/tasks/cephfs/test_journal_migration.py
@@ -67,6 +67,7 @@ class TestJournalMigration(CephFSTestCase):
             ))
 
         # Verify that cephfs-journal-tool can now read the rewritten journal
+        self.fs.fail()
         inspect_out = self.fs.journal_tool(["journal", "inspect"], 0)
         if not inspect_out.endswith(": OK"):
             raise RuntimeError("Unexpected journal-tool result: '{0}'".format(
@@ -84,6 +85,8 @@ class TestJournalMigration(CephFSTestCase):
         if event_count < 1000:
             # Approximate value of "lots", expected from having run fsstress
             raise RuntimeError("Unexpectedly few journal events: {0}".format(event_count))
+        self.fs.set_joinable()
+        self.fs.wait_for_daemons()
 
         # Do some client work to check that writing the log is still working
         with self.mount_a.mounted_wait():
diff --git a/qa/tasks/cephfs/test_journal_repair.py b/qa/tasks/cephfs/test_journal_repair.py
index 365140fd9f60a..0a4bdf17286b2 100644
--- a/qa/tasks/cephfs/test_journal_repair.py
+++ b/qa/tasks/cephfs/test_journal_repair.py
@@ -86,7 +86,7 @@ class TestJournalRepair(CephFSTestCase):
 
         # Now check the MDS can read what we wrote: truncate the journal
         # and start the mds.
-        self.fs.journal_tool(['journal', 'reset'], 0)
+        self.fs.journal_tool(['journal', 'reset', '--yes-i-really-really-mean-it'], 0)
         self.fs.set_joinable()
         self.fs.wait_for_daemons()
 
@@ -231,7 +231,7 @@ class TestJournalRepair(CephFSTestCase):
         self.fs.journal_tool(["event", "recover_dentries", "summary"], 0, quiet=True)
         self.fs.journal_tool(["event", "recover_dentries", "summary"], 1, quiet=True)
         self.fs.table_tool(["0", "reset", "session"])
-        self.fs.journal_tool(["journal", "reset"], 0)
+        self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0)
         self.fs.erase_mds_objects(1)
         self.run_ceph_cmd('fs', 'reset', self.fs.name,
                           '--yes-i-really-mean-it')
diff --git a/qa/tasks/cephfs/test_recovery_pool.py b/qa/tasks/cephfs/test_recovery_pool.py
index 7aef282298590..92eeefe8ada01 100644
--- a/qa/tasks/cephfs/test_recovery_pool.py
+++ b/qa/tasks/cephfs/test_recovery_pool.py
@@ -138,7 +138,7 @@ class TestRecoveryPool(CephFSTestCase):
         if False:
             with self.assertRaises(CommandFailedError):
                 # Normal reset should fail when no objects are present, we'll use --force instead
-                self.fs.journal_tool(["journal", "reset"], 0)
+                self.fs.journal_tool(["journal", "reset", "--yes-i-really-really-mean-it"], 0)
 
         recovery_fs.data_scan(['scan_extents', '--alternate-pool',
                            recovery_pool, '--filesystem', self.fs.name,
@@ -150,7 +150,7 @@ class TestRecoveryPool(CephFSTestCase):
         recovery_fs.data_scan(['scan_links', '--filesystem', recovery_fs.name])
         recovery_fs.journal_tool(['event', 'recover_dentries', 'list',
                               '--alternate-pool', recovery_pool], 0)
-        recovery_fs.journal_tool(["journal", "reset", "--force"], 0)
+        recovery_fs.journal_tool(["journal", "reset", "--force", "--yes-i-really-really-mean-it"], 0)
 
         # Start the MDS
         recovery_fs.set_joinable()
diff --git a/qa/workunits/fs/damage/test-first-damage.sh b/qa/workunits/fs/damage/test-first-damage.sh
index 57447b957d789..5038ef3cd050f 100755
--- a/qa/workunits/fs/damage/test-first-damage.sh
+++ b/qa/workunits/fs/damage/test-first-damage.sh
@@ -84,7 +84,7 @@ function recover {
   ceph fs fail "$FS"
   sleep 5
   cephfs-journal-tool --rank="$FS":0 event recover_dentries summary
-  cephfs-journal-tool --rank="$FS":0 journal reset
+  cephfs-journal-tool --rank="$FS":0 journal reset --yes-i-really-really-mean-it
   "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug1 --memo /tmp/memo1 "$METADATA_POOL"
   "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug2 --memo /tmp/memo2 --repair-nosnap  "$METADATA_POOL"
   "$PYTHON" $FIRST_DAMAGE --debug /tmp/debug3 --memo /tmp/memo3 --remove "$METADATA_POOL"
diff --git a/qa/workunits/suites/cephfs_journal_tool_smoke.sh b/qa/workunits/suites/cephfs_journal_tool_smoke.sh
index a24dac532d5a2..6a5379e1b4772 100755
--- a/qa/workunits/suites/cephfs_journal_tool_smoke.sh
+++ b/qa/workunits/suites/cephfs_journal_tool_smoke.sh
@@ -50,7 +50,7 @@ if [ ! -s $JOURNAL_FILE ] ; then
 fi
 
 # Can we execute a journal reset?
-$BIN journal reset
+$BIN journal reset --yes-i-really-really-mean-it
 $BIN journal inspect
 $BIN header get
 
@@ -86,6 +86,6 @@ $BIN event splice summary
 # Metadata objects have been modified by the 'event recover_dentries' command.
 # Journal is no long consistent with respect to metadata objects (especially inotable).
 # To ensure mds successfully replays its journal, we need to do journal reset.
-$BIN journal reset
+$BIN journal reset --yes-i-really-really-mean-it
 cephfs-table-tool all reset session
 
-- 
2.39.5