From: Patrick Donnelly Date: Tue, 12 Dec 2017 19:09:19 +0000 (-0800) Subject: mds: obsolete MDSMap option configs X-Git-Tag: v12.2.11~41^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F25431%2Fhead;p=ceph.git mds: obsolete MDSMap option configs These configs were used for initialization but it is more appropriate to require setting these file system attributes via `ceph fs set`. This is similar to what was already done with max_mds. There are new variables added for `fs set` where missing. Signed-off-by: Patrick Donnelly (cherry picked from commit 67ca6cd229a595d54ccea18b5452f2574ede9657) Fixes: http://tracker.ceph.com/issues/37540 Conflicts: PendingReleaseNotes qa/tasks/cephfs/filesystem.py qa/tasks/cephfs/test_strays.py src/common/legacy_config_opts.h src/mds/FSMap.cc src/mds/Locker.cc src/mds/MDBalancer.cc src/mds/MDSMap.h src/mds/Server.cc --- diff --git a/PendingReleaseNotes b/PendingReleaseNotes index 46eddc7d183c..ea59be4ab1b3 100644 --- a/PendingReleaseNotes +++ b/PendingReleaseNotes @@ -144,3 +144,7 @@ additional 500 MB to 1 GB of RAM for large clusters, and much less for small clusters. +* New CephFS file system attributes session_timeout and session_autoclose + are configurable via `ceph fs set`. The MDS config options + mds_session_timeout, mds_session_autoclose, and mds_max_file_size are now + obsolete. diff --git a/doc/cephfs/dirfrags.rst b/doc/cephfs/dirfrags.rst index 717553fea9af..24b05edfc268 100644 --- a/doc/cephfs/dirfrags.rst +++ b/doc/cephfs/dirfrags.rst @@ -25,10 +25,9 @@ fragments may be *merged* to reduce the number of fragments in the directory. Splitting and merging ===================== -An MDS will only consider doing splits and merges if the ``mds_bal_frag`` -setting is true in the MDS's configuration file, and the allow_dirfrags -setting is true in the filesystem map (set on the mons). These settings -are both true by default since the *Luminous* (12.2.x) release of Ceph. +An MDS will only consider doing splits if the allow_dirfrags setting is true in +the file system map (set on the mons). This setting is true by default since +the *Luminous* release (12.2.X). When an MDS identifies a directory fragment to be split, it does not do the split immediately. Because splitting interrupts metadata IO, diff --git a/doc/cephfs/eviction.rst b/doc/cephfs/eviction.rst index 8f0f20b8448c..e803da179c2a 100644 --- a/doc/cephfs/eviction.rst +++ b/doc/cephfs/eviction.rst @@ -23,9 +23,9 @@ Automatic client eviction There are three situations in which a client may be evicted automatically: -On an active MDS daemon, if a client has not communicated with the MDS for -over ``mds_session_autoclose`` seconds (300 seconds by default), then it -will be evicted automatically. +On an active MDS daemon, if a client has not communicated with the MDS for over +``session_autoclose`` (a file system variable) seconds (300 seconds by +default), then it will be evicted automatically. On an active MDS daemon, if a client has not responded to cap revoke messages for over ``mds_cap_revoke_eviction_timeout`` (configuration option) seconds. diff --git a/doc/cephfs/health-messages.rst b/doc/cephfs/health-messages.rst index 7b82c2f87633..3a6217c7b690 100644 --- a/doc/cephfs/health-messages.rst +++ b/doc/cephfs/health-messages.rst @@ -67,7 +67,7 @@ are like locks. Sometimes, for example when another client needs access, the MDS will request clients release their capabilities. If the client is unresponsive or buggy, it might fail to do so promptly or fail to do so at all. This message appears if a client has taken longer than -``mds_session_timeout`` (default 60s) to comply. +``session_timeout`` (default 60s) to comply. Message: "Client *name* failing to respond to cache pressure" Code: MDS_HEALTH_CLIENT_RECALL, MDS_HEALTH_CLIENT_RECALL_MANY diff --git a/doc/cephfs/mds-config-ref.rst b/doc/cephfs/mds-config-ref.rst index 2fd47ae334f8..70a97c90f5b6 100644 --- a/doc/cephfs/mds-config-ref.rst +++ b/doc/cephfs/mds-config-ref.rst @@ -10,15 +10,6 @@ :Type: Boolean :Default: ``true`` - -``mds max file size`` - -:Description: The maximum allowed file size to set when creating a - new file system. - -:Type: 64-bit Integer Unsigned -:Default: ``1ULL << 40`` - ``mds cache memory limit`` :Description: The memory limit the MDS should enforce for its cache. @@ -102,24 +93,6 @@ :Default: ``24.0*60.0`` -``mds session timeout`` - -:Description: The interval (in seconds) of client inactivity before Ceph - times out capabilities and leases. - -:Type: Float -:Default: ``60`` - - -``mds session autoclose`` - -:Description: The interval (in seconds) before Ceph closes - a laggy client's session. - -:Type: Float -:Default: ``300`` - - ``mds reconnect timeout`` :Description: The interval (in seconds) to wait for clients to reconnect @@ -249,13 +222,6 @@ :Default: ``0`` -``mds bal frag`` - -:Description: Determines whether the MDS will fragment directories. -:Type: Boolean -:Default: ``false`` - - ``mds bal split size`` :Description: The maximum directory size before the MDS will split a directory diff --git a/qa/tasks/cephfs/filesystem.py b/qa/tasks/cephfs/filesystem.py index b045e4d1112c..19883c306098 100644 --- a/qa/tasks/cephfs/filesystem.py +++ b/qa/tasks/cephfs/filesystem.py @@ -444,10 +444,10 @@ class Filesystem(MDSCluster): self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a) def set_max_mds(self, max_mds): - self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "max_mds", "%d" % max_mds) + self.set_var("max_mds", "%d" % max_mds) def set_allow_dirfrags(self, yes): - self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it') + self.set_var("allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it') def get_pgs_per_fs_pool(self): """ diff --git a/qa/tasks/cephfs/test_client_limits.py b/qa/tasks/cephfs/test_client_limits.py index b06d5123d8f2..1f1d5467079c 100644 --- a/qa/tasks/cephfs/test_client_limits.py +++ b/qa/tasks/cephfs/test_client_limits.py @@ -134,10 +134,10 @@ class TestClientLimits(CephFSTestCase): # Client B tries to stat the file that client A created rproc = self.mount_b.write_background("file1") - # After mds_session_timeout, we should see a health warning (extra lag from + # After session_timeout, we should see a health warning (extra lag from # MDS beacon period) - mds_session_timeout = float(self.fs.get_config("mds_session_timeout")) - self.wait_for_health("MDS_CLIENT_LATE_RELEASE", mds_session_timeout + 10) + session_timeout = self.fs.get_var("session_timeout") + self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10) # Client B should still be stuck self.assertFalse(rproc.finished) diff --git a/qa/tasks/cephfs/test_client_recovery.py b/qa/tasks/cephfs/test_client_recovery.py index 829ca3d5c6a8..2b91cbfe6a1a 100644 --- a/qa/tasks/cephfs/test_client_recovery.py +++ b/qa/tasks/cephfs/test_client_recovery.py @@ -30,10 +30,9 @@ class TestClientNetworkRecovery(CephFSTestCase): REQUIRE_ONE_CLIENT_REMOTE = True CLIENTS_REQUIRED = 2 - LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"] + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] # Environment references - mds_session_timeout = None mds_reconnect_timeout = None ms_max_backoff = None @@ -45,6 +44,8 @@ class TestClientNetworkRecovery(CephFSTestCase): I/O after failure. """ + session_timeout = self.fs.get_var("session_timeout") + # We only need one client self.mount_b.umount_wait() @@ -67,7 +68,7 @@ class TestClientNetworkRecovery(CephFSTestCase): # ...then it should block self.assertFalse(write_blocked.finished) self.assert_session_state(client_id, "open") - time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale self.assertFalse(write_blocked.finished) self.assert_session_state(client_id, "stale") @@ -87,10 +88,9 @@ class TestClientRecovery(CephFSTestCase): REQUIRE_KCLIENT_REMOTE = True CLIENTS_REQUIRED = 2 - LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"] + LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"] # Environment references - mds_session_timeout = None mds_reconnect_timeout = None ms_max_backoff = None @@ -214,6 +214,8 @@ class TestClientRecovery(CephFSTestCase): self.mount_a.create_destroy() def test_stale_caps(self): + session_timeout = self.fs.get_var("session_timeout") + # Capability release from stale session # ===================================== cap_holder = self.mount_a.open_background() @@ -226,7 +228,7 @@ class TestClientRecovery(CephFSTestCase): self.mount_a.kill() try: - # Now, after mds_session_timeout seconds, the waiter should + # Now, after session_timeout seconds, the waiter should # complete their operation when the MDS marks the holder's # session stale. cap_waiter = self.mount_b.write_background() @@ -239,9 +241,9 @@ class TestClientRecovery(CephFSTestCase): cap_waited = b - a log.info("cap_waiter waited {0}s".format(cap_waited)) - self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0, + self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0, "Capability handover took {0}, expected approx {1}".format( - cap_waited, self.mds_session_timeout + cap_waited, session_timeout )) cap_holder.stdin.close() @@ -261,6 +263,8 @@ class TestClientRecovery(CephFSTestCase): # Eviction while holding a capability # =================================== + session_timeout = self.fs.get_var("session_timeout") + # Take out a write capability on a file on client A, # and then immediately kill it. cap_holder = self.mount_a.open_background() @@ -290,9 +294,9 @@ class TestClientRecovery(CephFSTestCase): log.info("cap_waiter waited {0}s".format(cap_waited)) # This is the check that it happened 'now' rather than waiting # for the session timeout - self.assertLess(cap_waited, self.mds_session_timeout / 2.0, + self.assertLess(cap_waited, session_timeout / 2.0, "Capability handover took {0}, expected less than {1}".format( - cap_waited, self.mds_session_timeout / 2.0 + cap_waited, session_timeout / 2.0 )) cap_holder.stdin.close() @@ -479,6 +483,8 @@ class TestClientRecovery(CephFSTestCase): if not isinstance(self.mount_a, FuseMount): raise SkipTest("Require FUSE client to handle signal STOP/CONT") + session_timeout = self.fs.get_var("session_timeout") + self.mount_a.run_shell(["mkdir", "testdir"]) self.mount_a.run_shell(["touch", "testdir/file1"]) # populate readdir cache @@ -497,7 +503,7 @@ class TestClientRecovery(CephFSTestCase): self.mount_b.client_remote.run(args=["sudo", "kill", "-STOP", mount_b_pid]) self.assert_session_state(mount_b_gid, "open") - time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale + time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale self.assert_session_state(mount_b_gid, "stale") self.mount_a.run_shell(["touch", "testdir/file2"]) diff --git a/qa/tasks/cephfs/test_fragment.py b/qa/tasks/cephfs/test_fragment.py index a62ef7432164..54a49cea2fc6 100644 --- a/qa/tasks/cephfs/test_fragment.py +++ b/qa/tasks/cephfs/test_fragment.py @@ -33,7 +33,6 @@ class TestFragmentation(CephFSTestCase): Apply kwargs as MDS configuration settings, enable dirfrags and restart the MDSs. """ - kwargs['mds_bal_frag'] = "true" for k, v in kwargs.items(): self.ceph_cluster.set_ceph_conf("mds", k, v.__str__()) diff --git a/qa/tasks/cephfs/test_misc.py b/qa/tasks/cephfs/test_misc.py index cc5f674cf5eb..90ba94161ca4 100644 --- a/qa/tasks/cephfs/test_misc.py +++ b/qa/tasks/cephfs/test_misc.py @@ -13,9 +13,6 @@ log = logging.getLogger(__name__) class TestMisc(CephFSTestCase): CLIENTS_REQUIRED = 2 - LOAD_SETTINGS = ["mds_session_autoclose"] - mds_session_autoclose = None - def test_getattr_caps(self): """ Check if MDS recognizes the 'mask' parameter of open request. @@ -106,6 +103,8 @@ class TestMisc(CephFSTestCase): only session """ + session_autoclose = self.fs.get_var("session_autoclose") + self.mount_b.umount_wait() ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) @@ -113,7 +112,7 @@ class TestMisc(CephFSTestCase): self.mount_a.kill() self.mount_a.kill_cleanup() - time.sleep(self.mds_session_autoclose * 1.5) + time.sleep(session_autoclose * 1.5) ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) @@ -128,7 +127,7 @@ class TestMisc(CephFSTestCase): self.mount_a.kill() self.mount_a.kill_cleanup() - time.sleep(self.mds_session_autoclose * 1.5) + time.sleep(session_autoclose * 1.5) ls_data = self.fs.mds_asok(['session', 'ls']) self.assert_session_count(1, ls_data) diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h index 291fbb939485..9e71c453b234 100644 --- a/src/common/legacy_config_opts.h +++ b/src/common/legacy_config_opts.h @@ -429,7 +429,6 @@ OPTION(journaler_write_head_interval, OPT_INT) OPTION(journaler_prefetch_periods, OPT_INT) // * journal object size OPTION(journaler_prezero_periods, OPT_INT) // * journal object size OPTION(mds_data, OPT_STR) -OPTION(mds_max_file_size, OPT_U64) // Used when creating new CephFS. Change with 'ceph mds set max_file_size ' afterwards // max xattr kv pairs size for each dir/file OPTION(mds_max_xattr_pairs_size, OPT_U32) OPTION(mds_max_file_recover, OPT_U32) @@ -440,17 +439,15 @@ OPTION(mds_beacon_interval, OPT_FLOAT) OPTION(mds_beacon_grace, OPT_FLOAT) OPTION(mds_enforce_unique_name, OPT_BOOL) -OPTION(mds_session_timeout, OPT_FLOAT) // cap bits and leases time out if client unresponsive or not returning its caps OPTION(mds_session_blacklist_on_timeout, OPT_BOOL) // whether to blacklist clients whose sessions are dropped due to timeout OPTION(mds_session_blacklist_on_evict, OPT_BOOL) // whether to blacklist clients whose sessions are dropped via admin commands OPTION(mds_sessionmap_keys_per_op, OPT_U32) // how many sessions should I try to load/store in a single OMAP operation? OPTION(mds_recall_state_timeout, OPT_FLOAT) // detect clients which aren't trimming caps OPTION(mds_freeze_tree_timeout, OPT_FLOAT) // detecting freeze tree deadlock -OPTION(mds_session_autoclose, OPT_FLOAT) // autoclose idle session OPTION(mds_health_summarize_threshold, OPT_INT) // collapse N-client health metrics to a single 'many' OPTION(mds_reconnect_timeout, OPT_FLOAT) // seconds to wait for clients during mds restart - // make it (mds_session_timeout - mds_beacon_grace) + // make it (mdsmap.session_timeout - mds_beacon_grace) OPTION(mds_tick_interval, OPT_FLOAT) OPTION(mds_dirstat_min_interval, OPT_FLOAT) // try to avoid propagating more often than this OPTION(mds_scatter_nudge_interval, OPT_FLOAT) // how quickly dirstat changes propagate up the hierarchy @@ -467,7 +464,6 @@ OPTION(mds_bal_export_pin, OPT_BOOL) // allow clients to pin directory trees to OPTION(mds_bal_sample_interval, OPT_DOUBLE) // every 3 seconds OPTION(mds_bal_replicate_threshold, OPT_FLOAT) OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT) -OPTION(mds_bal_frag, OPT_BOOL) OPTION(mds_bal_split_size, OPT_INT) OPTION(mds_bal_split_rd, OPT_FLOAT) OPTION(mds_bal_split_wr, OPT_FLOAT) diff --git a/src/common/options.cc b/src/common/options.cc index 05eb0cbef997..7cf12a2a6a11 100644 --- a/src/common/options.cc +++ b/src/common/options.cc @@ -6056,10 +6056,6 @@ std::vector