From 53d63e246dfd2d5930fb4a559e7352cf44478223 Mon Sep 17 00:00:00 2001 From: Rishabh Dave Date: Wed, 13 Mar 2024 15:01:02 +0530 Subject: [PATCH] cephfs,mon: require confirmation to fail unhealthy FS Confirmation flag must be passed when running the command "ceph fs fail" when the MDS for this FS has either of the two health warnings: MDS_TRIM or MDS_CACHE_OVERSIZED. Else, the command will fail and print an appropriate error message. Restarting an MDS with these health warnings is not recommened since it will have a slow recovery during restart which will create new problems. Fixes: https://tracker.ceph.com/issues/61866 Signed-off-by: Rishabh Dave (cherry picked from commit b901616494a8359e59f7ec2cd661077c4aced01c) Conflicts: - src/mon/FSCommands.cc - lines surrounding the patch are different in reef compared to main. the reef code was still accessing "mds_map" directly instead of accessing it using "get_mds_map()". - return value of get_filesystem() is different in main. --- src/mon/FSCommands.cc | 13 +++++++++++-- src/mon/MonCommands.h | 3 ++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index 6b02fbbaccd..c564226b04f 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -116,8 +116,17 @@ class FailHandler : public FileSystemCommandHandler auto fs = fsmap.get_filesystem(fs_name); - auto f = [](auto fs) { - fs->mds_map.set_flag(CEPH_MDSMAP_NOT_JOINABLE); + bool confirm = false; + cmd_getval(cmdmap, "yes_i_really_mean_it", confirm); + if (!confirm && + mon->mdsmon()->has_health_warnings({ + MDS_HEALTH_TRIM, MDS_HEALTH_CACHE_OVERSIZED})) { + ss << errmsg_for_unhealthy_mds; + return -EPERM; + } + + auto f = [](auto&& fs) { + fs->get_mds_map().set_flag(CEPH_MDSMAP_NOT_JOINABLE); }; fsmap.modify_filesystem(fs->fscid, std::move(f)); diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index a6bb07132a6..151ddb83d4f 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -354,7 +354,8 @@ COMMAND("fs new " "make new filesystem using named pools and ", "fs", "rw") COMMAND("fs fail " - "name=fs_name,type=CephString ", + "name=fs_name,type=CephString " + "name=yes_i_really_mean_it,type=CephBool,req=false", "bring the file system down and all of its ranks", "fs", "rw") COMMAND("fs rm " -- 2.39.5