From: Rishabh Dave Date: Wed, 13 Mar 2024 09:31:02 +0000 (+0530) Subject: cephfs,mon: require confirmation to fail unhealthy FS X-Git-Tag: v18.2.5~619^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=53d63e246dfd2d5930fb4a559e7352cf44478223;p=ceph.git cephfs,mon: require confirmation to fail unhealthy FS Confirmation flag must be passed when running the command "ceph fs fail" when the MDS for this FS has either of the two health warnings: MDS_TRIM or MDS_CACHE_OVERSIZED. Else, the command will fail and print an appropriate error message. Restarting an MDS with these health warnings is not recommened since it will have a slow recovery during restart which will create new problems. Fixes: https://tracker.ceph.com/issues/61866 Signed-off-by: Rishabh Dave (cherry picked from commit b901616494a8359e59f7ec2cd661077c4aced01c) Conflicts: - src/mon/FSCommands.cc - lines surrounding the patch are different in reef compared to main. the reef code was still accessing "mds_map" directly instead of accessing it using "get_mds_map()". - return value of get_filesystem() is different in main. --- diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 6b02fbbaccd2..c564226b04f5 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -116,8 +116,17 @@ class FailHandler : public FileSystemCommandHandler auto fs = fsmap.get_filesystem(fs_name); - auto f = [](auto fs) { - fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE); + bool confirm = false; + cmd_getval(cmdmap, "yes_i_really_mean_it", confirm); + if (!confirm && + mon->mdsmon()->has_health_warnings({ + MDS_HEALTH_TRIM, MDS_HEALTH_CACHE_OVERSIZED})) { + ss << errmsg_for_unhealthy_mds; + return -EPERM; + } + + auto f = [](auto&& fs) { + fs->get_mds_map().set_flag(CEPH_MDSMAP_NOT_JOINABLE); }; fsmap.modify_filesystem(fs->fscid, std::move(f)); diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index a6bb07132a6e..151ddb83d4fd 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -354,7 +354,8 @@ COMMAND("fs new " "make new filesystem using named pools and ", "fs", "rw") COMMAND("fs fail " - "name=fs_name,type=CephString ", + "name=fs_name,type=CephString " + "name=yes_i_really_mean_it,type=CephBool,req=false", "bring the file system down and all of its ranks", "fs", "rw") COMMAND("fs rm "