From c7c88479a1ae5ddf82f501f77226e357a12af5d7 Mon Sep 17 00:00:00 2001 From: Mykola Golub Date: Wed, 7 Jun 2023 13:57:38 +0100 Subject: [PATCH] mds: optionally forbid to use standby for another fs as last resort Signed-off-by: Mykola Golub (cherry picked from commit 386c4bbb81985322c547057daa994f15c6b5b8b9) --- PendingReleaseNotes | 5 +++++ doc/cephfs/standby.rst | 14 ++++++++++---- src/include/ceph_fs.h | 2 ++ src/mds/FSMap.cc | 3 ++- src/mds/MDSMap.cc | 3 +++ src/mds/MDSMap.h | 3 ++- src/mon/FSCommands.cc | 32 ++++++++++++++++++++++++++++++++ src/mon/MonCommands.h | 2 +- 8 files changed, 57 insertions(+), 7 deletions(-) diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 7bf5ef78232..9275a882a77 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -297,3 +297,8 @@ Relevant tracker: https://tracker.ceph.com/issues/57090 key/value set for a filesystem extended attributes. It effectively replaces the old per-MDS `max_xattr_pairs_size` setting, which is now dropped. Relevant tracker: https://tracker.ceph.com/issues/55725 + +* Introduced a new file system flag `refuse_standby_for_another_fs` that can be +set using the `fs set` command. This flag prevents using a standby for another +file system (join_fs = X) when standby for the current filesystem is not available. +Relevant tracker: https://tracker.ceph.com/issues/61599 diff --git a/doc/cephfs/standby.rst b/doc/cephfs/standby.rst index 367c6762b8f..e20735aaaf8 100644 --- a/doc/cephfs/standby.rst +++ b/doc/cephfs/standby.rst @@ -118,10 +118,16 @@ enforces this affinity. When failing over MDS daemons, a cluster's monitors will prefer standby daemons with ``mds_join_fs`` equal to the file system ``name`` with the failed ``rank``. If no standby exists with ``mds_join_fs`` equal to the file system ``name``, it will -choose an unqualified standby (no setting for ``mds_join_fs``) for the replacement, -or any other available standby, as a last resort. Note, this does not change the -behavior that ``standby-replay`` daemons are always selected before -other standbys. +choose an unqualified standby (no setting for ``mds_join_fs``) for the replacement. +As a last resort, a standby for another filesystem will be chosen, although this +behavior can be disabled: + +:: + + ceph fs set refuse_standby_for_another_fs true + +Note, configuring MDS file system affinity does not change the behavior that +``standby-replay`` daemons are always selected before other standbys. Even further, the monitors will regularly examine the CephFS file systems even when stable to check if a standby with stronger affinity is available to replace an diff --git a/src/include/ceph_fs.h b/src/include/ceph_fs.h index 28440c820dc..f567a26f411 100644 --- a/src/include/ceph_fs.h +++ b/src/include/ceph_fs.h @@ -290,6 +290,8 @@ struct ceph_mon_subscribe_ack { #define CEPH_MDSMAP_ALLOW_STANDBY_REPLAY (1<<5) /* cluster alllowed to enable MULTIMDS */ #define CEPH_MDSMAP_REFUSE_CLIENT_SESSION (1<<6) /* cluster allowed to refuse client session request */ +#define CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS (1<<7) /* fs is forbidden to use standby + for another fs */ #define CEPH_MDSMAP_DEFAULTS (CEPH_MDSMAP_ALLOW_SNAPS | \ CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS) diff --git a/src/mds/FSMap.cc b/src/mds/FSMap.cc index b9ae05ac0f6..e1c98be1b82 100644 --- a/src/mds/FSMap.cc +++ b/src/mds/FSMap.cc @@ -792,7 +792,8 @@ const MDSMap::mds_info_t* FSMap::get_available_standby(const Filesystem& fs) con break; } else if (info.join_fscid == FS_CLUSTER_ID_NONE) { who = &info; /* vanilla standby */ - } else if (who == nullptr) { + } else if (who == nullptr && + !fs.mds_map.test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS)) { who = &info; /* standby for another fs, last resort */ } } diff --git a/src/mds/MDSMap.cc b/src/mds/MDSMap.cc index 6ea1de533de..f1613dbf323 100644 --- a/src/mds/MDSMap.cc +++ b/src/mds/MDSMap.cc @@ -236,6 +236,7 @@ void MDSMap::dump_flags_state(Formatter *f) const f->dump_bool(flag_display.at(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS), allows_multimds_snaps()); f->dump_bool(flag_display.at(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY), allows_standby_replay()); f->dump_bool(flag_display.at(CEPH_MDSMAP_REFUSE_CLIENT_SESSION), test_flag(CEPH_MDSMAP_REFUSE_CLIENT_SESSION)); + f->dump_bool(flag_display.at(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS), test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS)); f->close_section(); } @@ -378,6 +379,8 @@ void MDSMap::print_flags(std::ostream& out) const { out << " " << flag_display.at(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY); if (test_flag(CEPH_MDSMAP_REFUSE_CLIENT_SESSION)) out << " " << flag_display.at(CEPH_MDSMAP_REFUSE_CLIENT_SESSION); + if (test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS)) + out << " " << flag_display.at(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS); } void MDSMap::get_health(list >& summary, diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index 7e1814e5977..52bc0fa367f 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -675,7 +675,8 @@ private: {CEPH_MDSMAP_ALLOW_SNAPS, "allow_snaps"}, {CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS, "allow_multimds_snaps"}, {CEPH_MDSMAP_ALLOW_STANDBY_REPLAY, "allow_standby_replay"}, - {CEPH_MDSMAP_REFUSE_CLIENT_SESSION, "refuse_client_session"} + {CEPH_MDSMAP_REFUSE_CLIENT_SESSION, "refuse_client_session"}, + {CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS, "refuse_standby_for_another_fs"} }; }; WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t) diff --git a/src/mon/FSCommands.cc b/src/mon/FSCommands.cc index df78639e099..3f369de19e4 100644 --- a/src/mon/FSCommands.cc +++ b/src/mon/FSCommands.cc @@ -724,6 +724,38 @@ public: ss << "client(s) already allowed to establish new session(s)"; } } + } else if (var == "refuse_standby_for_another_fs") { + bool refuse_standby_for_another_fs = false; + int r = parse_bool(val, &refuse_standby_for_another_fs, ss); + if (r != 0) { + return r; + } + + if (refuse_standby_for_another_fs) { + if (!(fs->mds_map.test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS))) { + fsmap.modify_filesystem( + fs->fscid, + [](std::shared_ptr fs) + { + fs->mds_map.set_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS); + }); + ss << "set to refuse standby for another fs"; + } else { + ss << "to refuse standby for another fs is already set"; + } + } else { + if (fs->mds_map.test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS)) { + fsmap.modify_filesystem( + fs->fscid, + [](std::shared_ptr fs) + { + fs->mds_map.clear_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS); + }); + ss << "allowed to use standby for another fs"; + } else { + ss << "to use standby for another fs is already allowed"; + } + } } else { ss << "unknown variable " << var; return -EINVAL; diff --git a/src/mon/MonCommands.h b/src/mon/MonCommands.h index bce63182e31..dc6dea2f9e6 100644 --- a/src/mon/MonCommands.h +++ b/src/mon/MonCommands.h @@ -378,7 +378,7 @@ COMMAND("fs set " "|allow_new_snaps|inline_data|cluster_down|allow_dirfrags|balancer" "|standby_count_wanted|session_timeout|session_autoclose" "|allow_standby_replay|down|joinable|min_compat_client|bal_rank_mask" - "|refuse_client_session|max_xattr_size " + "|refuse_client_session|max_xattr_size|refuse_standby_for_another_fs " "name=val,type=CephString " "name=yes_i_really_mean_it,type=CephBool,req=false " "name=yes_i_really_really_mean_it,type=CephBool,req=false", -- 2.39.5