]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: optionally forbid to use standby for another fs as last resort
authorMykola Golub <mykola.golub@clyso.com>
Wed, 7 Jun 2023 12:57:38 +0000 (13:57 +0100)
committerMykola Golub <mgolub@suse.com>
Fri, 3 Nov 2023 16:33:25 +0000 (18:33 +0200)
Signed-off-by: Mykola Golub <mykola.golub@clyso.com>
(cherry picked from commit 386c4bbb81985322c547057daa994f15c6b5b8b9)

PendingReleaseNotes
doc/cephfs/standby.rst
src/include/ceph_fs.h
src/mds/FSMap.cc
src/mds/MDSMap.cc
src/mds/MDSMap.h
src/mon/FSCommands.cc
src/mon/MonCommands.h

index 7bf5ef7823217a59d72da230c25e34b26f6105c3..9275a882a77331e7dd6f13d45896909ff0edf491 100644 (file)
@@ -297,3 +297,8 @@ Relevant tracker: https://tracker.ceph.com/issues/57090
   key/value set for a filesystem extended attributes.  It effectively replaces
   the old per-MDS `max_xattr_pairs_size` setting, which is now dropped.
   Relevant tracker: https://tracker.ceph.com/issues/55725
+
+* Introduced a new file system flag `refuse_standby_for_another_fs` that can be
+set using the `fs set` command. This flag prevents using a standby for another
+file system (join_fs = X) when standby for the current filesystem is not available.
+Relevant tracker: https://tracker.ceph.com/issues/61599
index 367c6762b8f094b107998ec7bc72315537940efd..e20735aaaf891d3ec7762d39d765ee3a4e87fecc 100644 (file)
@@ -118,10 +118,16 @@ enforces this affinity.
 When failing over MDS daemons, a cluster's monitors will prefer standby daemons with
 ``mds_join_fs`` equal to the file system ``name`` with the failed ``rank``.  If no
 standby exists with ``mds_join_fs`` equal to the file system ``name``, it will
-choose an unqualified standby (no setting for ``mds_join_fs``) for the replacement,
-or any other available standby, as a last resort. Note, this does not change the
-behavior that ``standby-replay`` daemons are always selected before
-other standbys.
+choose an unqualified standby (no setting for ``mds_join_fs``) for the replacement.
+As a last resort, a standby for another filesystem will be chosen, although this
+behavior can be disabled:
+
+::
+
+    ceph fs set <fs name> refuse_standby_for_another_fs true
+
+Note, configuring MDS file system affinity does not change the behavior that
+``standby-replay`` daemons are always selected before other standbys.
 
 Even further, the monitors will regularly examine the CephFS file systems even when
 stable to check if a standby with stronger affinity is available to replace an
index 28440c820dcfb8f17d0a04fcc301401025ec80fd..f567a26f411066f239f80bb70cb9d8eeaf9b1555 100644 (file)
@@ -290,6 +290,8 @@ struct ceph_mon_subscribe_ack {
 #define CEPH_MDSMAP_ALLOW_STANDBY_REPLAY         (1<<5)  /* cluster alllowed to enable MULTIMDS */
 #define CEPH_MDSMAP_REFUSE_CLIENT_SESSION        (1<<6)  /* cluster allowed to refuse client session
                                                             request */
+#define CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS (1<<7) /* fs is forbidden to use standby
+                                                            for another fs */
 #define CEPH_MDSMAP_DEFAULTS (CEPH_MDSMAP_ALLOW_SNAPS | \
                              CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS)
 
index b9ae05ac0f62246831f30ed0d7ed6c9d0df8b1ae..e1c98be1b82e6cb999bf7a3ab168e75b79024fdf 100644 (file)
@@ -792,7 +792,8 @@ const MDSMap::mds_info_t* FSMap::get_available_standby(const Filesystem& fs) con
       break;
     } else if (info.join_fscid == FS_CLUSTER_ID_NONE) {
       who = &info; /* vanilla standby */
-    } else if (who == nullptr) {
+    } else if (who == nullptr &&
+              !fs.mds_map.test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS)) {
       who = &info; /* standby for another fs, last resort */
     }
   }
index 6ea1de533de7ce9d42e6e69b75bd7ecd5cca3cab..f1613dbf323ae9c472a6ee7db6662e08bff68715 100644 (file)
@@ -236,6 +236,7 @@ void MDSMap::dump_flags_state(Formatter *f) const
     f->dump_bool(flag_display.at(CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS), allows_multimds_snaps());
     f->dump_bool(flag_display.at(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY), allows_standby_replay());
     f->dump_bool(flag_display.at(CEPH_MDSMAP_REFUSE_CLIENT_SESSION), test_flag(CEPH_MDSMAP_REFUSE_CLIENT_SESSION));
+    f->dump_bool(flag_display.at(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS), test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS));
     f->close_section();
 }
 
@@ -378,6 +379,8 @@ void MDSMap::print_flags(std::ostream& out) const {
     out << " " << flag_display.at(CEPH_MDSMAP_ALLOW_STANDBY_REPLAY);
   if (test_flag(CEPH_MDSMAP_REFUSE_CLIENT_SESSION))
     out << " " << flag_display.at(CEPH_MDSMAP_REFUSE_CLIENT_SESSION);
+  if (test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS))
+    out << " " << flag_display.at(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS);
 }
 
 void MDSMap::get_health(list<pair<health_status_t,string> >& summary,
index 7e1814e5977a49e7b71b40537badd2af73665f62..52bc0fa367f087836614c739476cd9d9ba453e5b 100644 (file)
@@ -675,7 +675,8 @@ private:
     {CEPH_MDSMAP_ALLOW_SNAPS, "allow_snaps"},
     {CEPH_MDSMAP_ALLOW_MULTIMDS_SNAPS, "allow_multimds_snaps"},
     {CEPH_MDSMAP_ALLOW_STANDBY_REPLAY, "allow_standby_replay"},
-    {CEPH_MDSMAP_REFUSE_CLIENT_SESSION, "refuse_client_session"}
+    {CEPH_MDSMAP_REFUSE_CLIENT_SESSION, "refuse_client_session"},
+    {CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS, "refuse_standby_for_another_fs"}
   };
 };
 WRITE_CLASS_ENCODER_FEATURES(MDSMap::mds_info_t)
index df78639e09923c8a41eeb52efa0c2230fefb2f4d..3f369de19e4f525adcd980eb13d32bfa2afc49cc 100644 (file)
@@ -724,6 +724,38 @@ public:
             ss << "client(s) already allowed to establish new session(s)";
           }
       }
+    } else if (var == "refuse_standby_for_another_fs") {
+      bool refuse_standby_for_another_fs = false;
+      int r = parse_bool(val, &refuse_standby_for_another_fs, ss);
+      if (r != 0) {
+        return r;
+      }
+
+      if (refuse_standby_for_another_fs) {
+        if (!(fs->mds_map.test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS))) {
+          fsmap.modify_filesystem(
+            fs->fscid,
+            [](std::shared_ptr<Filesystem> fs)
+          {
+            fs->mds_map.set_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS);
+          });
+          ss << "set to refuse standby for another fs";
+        } else {
+          ss << "to refuse standby for another fs is already set";
+        }
+      } else {
+          if (fs->mds_map.test_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS)) {
+            fsmap.modify_filesystem(
+              fs->fscid,
+              [](std::shared_ptr<Filesystem> fs)
+            {
+              fs->mds_map.clear_flag(CEPH_MDSMAP_REFUSE_STANDBY_FOR_ANOTHER_FS);
+            });
+            ss << "allowed to use standby for another fs";
+          } else {
+            ss << "to use standby for another fs is already allowed";
+          }
+      }
     } else {
       ss << "unknown variable " << var;
       return -EINVAL;
index bce63182e317e9a057eaa7482d0dadd4f17bd120..dc6dea2f9e6f6544ceb1dab15b5e620590bf64ac 100644 (file)
@@ -378,7 +378,7 @@ COMMAND("fs set "
         "|allow_new_snaps|inline_data|cluster_down|allow_dirfrags|balancer"
         "|standby_count_wanted|session_timeout|session_autoclose"
         "|allow_standby_replay|down|joinable|min_compat_client|bal_rank_mask"
-       "|refuse_client_session|max_xattr_size "
+       "|refuse_client_session|max_xattr_size|refuse_standby_for_another_fs "
        "name=val,type=CephString "
        "name=yes_i_really_mean_it,type=CephBool,req=false "
        "name=yes_i_really_really_mean_it,type=CephBool,req=false",