]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
cephfs,mon: require confirmation to fail unhealthy FS
authorRishabh Dave <ridave@redhat.com>
Wed, 13 Mar 2024 09:31:02 +0000 (15:01 +0530)
committerRishabh Dave <ridave@redhat.com>
Wed, 12 Jun 2024 10:21:13 +0000 (15:51 +0530)
Confirmation flag must be passed when running the command "ceph fs fail"
when the MDS for this FS has either of the two health warnings: MDS_TRIM
or MDS_CACHE_OVERSIZED. Else, the command will fail and print an
appropriate error message.

Restarting an MDS with these health warnings is not recommened since it
will have a slow recovery during restart which will create new problems.

Fixes: https://tracker.ceph.com/issues/61866
Signed-off-by: Rishabh Dave <ridave@redhat.com>
(cherry picked from commit b901616494a8359e59f7ec2cd661077c4aced01c)

Conflicts:
- src/mon/FSCommands.cc
  -  lines surrounding the patch are different in reef compared to main.
     the reef code was still accessing "mds_map" directly instead of
     accessing it using "get_mds_map()".
  - return value of get_filesystem() is different in main.

src/mon/FSCommands.cc
src/mon/MonCommands.h

index 6b02fbbaccd2cc45e95077952a19571f415b5fa3..c564226b04f5904f2bf03c8de327986219f00586 100644 (file)
@@ -116,8 +116,17 @@ class FailHandler : public FileSystemCommandHandler
 
     auto fs = fsmap.get_filesystem(fs_name);
 
-    auto f = [](auto fs) {
-      fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE);
+  bool confirm = false;
+  cmd_getval(cmdmap, "yes_i_really_mean_it", confirm);
+  if (!confirm &&
+      mon->mdsmon()->has_health_warnings({
+       MDS_HEALTH_TRIM, MDS_HEALTH_CACHE_OVERSIZED})) {
+    ss << errmsg_for_unhealthy_mds;
+    return -EPERM;
+  }
+
+    auto f = [](auto&& fs) {
+      fs->get_mds_map().set_flag(CEPH_MDSMAP_NOT_JOINABLE);
     };
     fsmap.modify_filesystem(fs->fscid, std::move(f));
 
index a6bb07132a6e70fdc06b7a00c028e0efc34733ff..151ddb83d4fd797972ce6ae9cd8cf337392e12b0 100644 (file)
@@ -354,7 +354,8 @@ COMMAND("fs new "
        "make new filesystem using named pools <metadata> and <data>",
        "fs", "rw")
 COMMAND("fs fail "
-       "name=fs_name,type=CephString ",
+       "name=fs_name,type=CephString "
+        "name=yes_i_really_mean_it,type=CephBool,req=false",
        "bring the file system down and all of its ranks",
        "fs", "rw")
 COMMAND("fs rm "