- mds remove_data_pool -> fs rm_data_pool
- mds rm_data_pool -> fs rm_data_pool
+ * New CephFS file system attributes session_timeout and session_autoclose
+ are configurable via `ceph fs set`. The MDS config options
+ mds_session_timeout, mds_session_autoclose, and mds_max_file_size are now
+ obsolete.
+
>= 12.2.2
---------
Splitting and merging
=====================
-An MDS will only consider doing splits and merges if the ``mds_bal_frag``
-setting is true in the MDS's configuration file, and the allow_dirfrags
-setting is true in the filesystem map (set on the mons). These settings
-are both true by default since the *Luminous* (12.2.x) release of Ceph.
+An MDS will only consider doing splits if the allow_dirfrags setting is true in
+the file system map (set on the mons). This setting is true by default since
+the *Luminous* release (12.2.X).
When an MDS identifies a directory fragment to be split, it does not
do the split immediately. Because splitting interrupts metadata IO,
There are two situations in which a client may be evicted automatically:
-On an active MDS daemon, if a client has not communicated with the MDS for
-over ``mds_session_autoclose`` seconds (300 seconds by default), then it
-will be evicted automatically.
+On an active MDS daemon, if a client has not communicated with the MDS for over
+``session_autoclose`` (a file system variable) seconds (300 seconds by
+default), then it will be evicted automatically.
During MDS startup (including on failover), the MDS passes through a
state called ``reconnect``. During this state, it waits for all the
the MDS will request clients release their capabilities. If the client
is unresponsive or buggy, it might fail to do so promptly or fail to do
so at all. This message appears if a client has taken longer than
-``mds_session_timeout`` (default 60s) to comply.
+``session_timeout`` (default 60s) to comply.
Message: "Client *name* failing to respond to cache pressure"
Code: MDS_HEALTH_CLIENT_RECALL, MDS_HEALTH_CLIENT_RECALL_MANY
:Type: Boolean
:Default: ``true``
-
-``mds max file size``
-
-:Description: The maximum allowed file size to set when creating a
- new file system.
-
-:Type: 64-bit Integer Unsigned
-:Default: ``1ULL << 40``
-
``mds cache memory limit``
:Description: The memory limit the MDS should enforce for its cache.
:Default: ``24.0*60.0``
-``mds session timeout``
-
-:Description: The interval (in seconds) of client inactivity before Ceph
- times out capabilities and leases.
-
-:Type: Float
-:Default: ``60``
-
-
-``mds session autoclose``
-
-:Description: The interval (in seconds) before Ceph closes
- a laggy client's session.
-
-:Type: Float
-:Default: ``300``
-
-
``mds reconnect timeout``
:Description: The interval (in seconds) to wait for clients to reconnect
:Default: ``0``
-``mds bal frag``
-
-:Description: Determines whether the MDS will fragment directories.
-:Type: Boolean
-:Default: ``false``
-
-
``mds bal split size``
:Description: The maximum directory size before the MDS will split a directory
raise RuntimeError("cannot deactivate rank 0")
self.mon_manager.raw_cluster_cmd("mds", "deactivate", "%d:%d" % (self.id, rank))
+ def set_var(self, var, *args):
+ a = map(str, args)
+ self.mon_manager.raw_cluster_cmd("fs", "set", self.name, var, *a)
+
def set_max_mds(self, max_mds):
- self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "max_mds", "%d" % max_mds)
+ self.set_var("max_mds", "%d" % max_mds)
def set_allow_dirfrags(self, yes):
- self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it')
+ self.set_var("allow_dirfrags", str(yes).lower(), '--yes-i-really-mean-it')
def set_allow_new_snaps(self, yes):
- self.mon_manager.raw_cluster_cmd("fs", "set", self.name, "allow_new_snaps", str(yes).lower(), '--yes-i-really-mean-it')
+ self.set_var("allow_new_snaps", str(yes).lower(), '--yes-i-really-mean-it')
def get_pgs_per_fs_pool(self):
"""
def get_mds_map(self):
return self.status().get_fsmap(self.id)['mdsmap']
+ def get_var(self, var):
+ return self.status().get_fsmap(self.id)['mdsmap'][var]
+
def add_data_pool(self, name):
self.mon_manager.raw_cluster_cmd('osd', 'pool', 'create', name, self.get_pgs_per_fs_pool().__str__())
self.mon_manager.raw_cluster_cmd('fs', 'add_data_pool', self.name, name)
# Client B tries to stat the file that client A created
rproc = self.mount_b.write_background("file1")
- # After mds_session_timeout, we should see a health warning (extra lag from
+ # After session_timeout, we should see a health warning (extra lag from
# MDS beacon period)
- mds_session_timeout = float(self.fs.get_config("mds_session_timeout"))
- self.wait_for_health("MDS_CLIENT_LATE_RELEASE", mds_session_timeout + 10)
+ session_timeout = self.fs.get_var("session_timeout")
+ self.wait_for_health("MDS_CLIENT_LATE_RELEASE", session_timeout + 10)
# Client B should still be stuck
self.assertFalse(rproc.finished)
REQUIRE_ONE_CLIENT_REMOTE = True
CLIENTS_REQUIRED = 2
- LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
+ LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
# Environment references
- mds_session_timeout = None
mds_reconnect_timeout = None
ms_max_backoff = None
I/O after failure.
"""
+ session_timeout = self.fs.get_var("session_timeout")
+
# We only need one client
self.mount_b.umount_wait()
# ...then it should block
self.assertFalse(write_blocked.finished)
self.assert_session_state(client_id, "open")
- time.sleep(self.mds_session_timeout * 1.5) # Long enough for MDS to consider session stale
+ time.sleep(session_timeout * 1.5) # Long enough for MDS to consider session stale
self.assertFalse(write_blocked.finished)
self.assert_session_state(client_id, "stale")
REQUIRE_KCLIENT_REMOTE = True
CLIENTS_REQUIRED = 2
- LOAD_SETTINGS = ["mds_session_timeout", "mds_reconnect_timeout", "ms_max_backoff"]
+ LOAD_SETTINGS = ["mds_reconnect_timeout", "ms_max_backoff"]
# Environment references
- mds_session_timeout = None
mds_reconnect_timeout = None
ms_max_backoff = None
self.mount_a.create_destroy()
def test_stale_caps(self):
+ session_timeout = self.fs.get_var("session_timeout")
+
# Capability release from stale session
# =====================================
cap_holder = self.mount_a.open_background()
self.mount_a.kill()
try:
- # Now, after mds_session_timeout seconds, the waiter should
+ # Now, after session_timeout seconds, the waiter should
# complete their operation when the MDS marks the holder's
# session stale.
cap_waiter = self.mount_b.write_background()
cap_waited = b - a
log.info("cap_waiter waited {0}s".format(cap_waited))
- self.assertTrue(self.mds_session_timeout / 2.0 <= cap_waited <= self.mds_session_timeout * 2.0,
+ self.assertTrue(session_timeout / 2.0 <= cap_waited <= session_timeout * 2.0,
"Capability handover took {0}, expected approx {1}".format(
- cap_waited, self.mds_session_timeout
+ cap_waited, session_timeout
))
cap_holder.stdin.close()
# Eviction while holding a capability
# ===================================
+ session_timeout = self.fs.get_var("session_timeout")
+
# Take out a write capability on a file on client A,
# and then immediately kill it.
cap_holder = self.mount_a.open_background()
log.info("cap_waiter waited {0}s".format(cap_waited))
# This is the check that it happened 'now' rather than waiting
# for the session timeout
- self.assertLess(cap_waited, self.mds_session_timeout / 2.0,
+ self.assertLess(cap_waited, session_timeout / 2.0,
"Capability handover took {0}, expected less than {1}".format(
- cap_waited, self.mds_session_timeout / 2.0
+ cap_waited, session_timeout / 2.0
))
cap_holder.stdin.close()
Apply kwargs as MDS configuration settings, enable dirfrags
and restart the MDSs.
"""
- kwargs['mds_bal_frag'] = "true"
for k, v in kwargs.items():
self.ceph_cluster.set_ceph_conf("mds", k, v.__str__())
class TestMisc(CephFSTestCase):
CLIENTS_REQUIRED = 2
- LOAD_SETTINGS = ["mds_session_autoclose"]
- mds_session_autoclose = None
-
def test_getattr_caps(self):
"""
Check if MDS recognizes the 'mask' parameter of open request.
only session
"""
+ session_autoclose = self.fs.get_var("session_autoclose")
+
self.mount_b.umount_wait()
ls_data = self.fs.mds_asok(['session', 'ls'])
self.assert_session_count(1, ls_data)
self.mount_a.kill()
self.mount_a.kill_cleanup()
- time.sleep(self.mds_session_autoclose * 1.5)
+ time.sleep(session_autoclose * 1.5)
ls_data = self.fs.mds_asok(['session', 'ls'])
self.assert_session_count(1, ls_data)
self.mount_a.kill()
self.mount_a.kill_cleanup()
- time.sleep(self.mds_session_autoclose * 1.5)
+ time.sleep(session_autoclose * 1.5)
ls_data = self.fs.mds_asok(['session', 'ls'])
self.assert_session_count(1, ls_data)
max_purge_files = 2
- self.set_conf('mds', 'mds_bal_frag', 'false')
+ self.fs.set_allow_dirfrags(True)
self.set_conf('mds', 'mds_max_purge_files', "%d" % max_purge_files)
self.fs.mds_fail_restart()
self.fs.wait_for_daemons()
OPTION(filer_max_truncate_ops, OPT_U32)
OPTION(mds_data, OPT_STR)
-OPTION(mds_max_file_size, OPT_U64) // Used when creating new CephFS. Change with 'ceph fs set <fs_name> max_file_size <size>' afterwards
// max xattr kv pairs size for each dir/file
OPTION(mds_max_xattr_pairs_size, OPT_U32)
OPTION(mds_max_file_recover, OPT_U32)
OPTION(mds_beacon_grace, OPT_FLOAT)
OPTION(mds_enforce_unique_name, OPT_BOOL)
-OPTION(mds_session_timeout, OPT_FLOAT) // cap bits and leases time out if client unresponsive or not returning its caps
OPTION(mds_session_blacklist_on_timeout, OPT_BOOL) // whether to blacklist clients whose sessions are dropped due to timeout
OPTION(mds_session_blacklist_on_evict, OPT_BOOL) // whether to blacklist clients whose sessions are dropped via admin commands
OPTION(mds_sessionmap_keys_per_op, OPT_U32) // how many sessions should I try to load/store in a single OMAP operation?
OPTION(mds_recall_state_timeout, OPT_FLOAT) // detect clients which aren't trimming caps
OPTION(mds_freeze_tree_timeout, OPT_FLOAT) // detecting freeze tree deadlock
-OPTION(mds_session_autoclose, OPT_FLOAT) // autoclose idle session
OPTION(mds_health_summarize_threshold, OPT_INT) // collapse N-client health metrics to a single 'many'
OPTION(mds_reconnect_timeout, OPT_FLOAT) // seconds to wait for clients during mds restart
- // make it (mds_session_timeout - mds_beacon_grace)
+ // make it (mdsmap.session_timeout - mds_beacon_grace)
OPTION(mds_tick_interval, OPT_FLOAT)
OPTION(mds_dirstat_min_interval, OPT_FLOAT) // try to avoid propagating more often than this
OPTION(mds_scatter_nudge_interval, OPT_FLOAT) // how quickly dirstat changes propagate up the hierarchy
OPTION(mds_bal_sample_interval, OPT_DOUBLE) // every 3 seconds
OPTION(mds_bal_replicate_threshold, OPT_FLOAT)
OPTION(mds_bal_unreplicate_threshold, OPT_FLOAT)
-OPTION(mds_bal_frag, OPT_BOOL)
OPTION(mds_bal_split_size, OPT_INT)
OPTION(mds_bal_split_rd, OPT_FLOAT)
OPTION(mds_bal_split_wr, OPT_FLOAT)
.set_default("/var/lib/ceph/mds/$cluster-$id")
.set_description(""),
- Option("mds_max_file_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
- .set_default(1ULL << 40)
- .set_description(""),
-
Option("mds_max_xattr_pairs_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(64_K)
.set_description(""),
.set_default(true)
.set_description(""),
- Option("mds_session_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
- .set_default(60)
- .set_description(""),
-
Option("mds_session_blacklist_on_timeout", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
.set_default(true)
.set_description(""),
.set_default(30)
.set_description(""),
- Option("mds_session_autoclose", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
- .set_default(300)
- .set_description(""),
-
Option("mds_health_summarize_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(10)
.set_description(""),
.set_default(0)
.set_description(""),
- Option("mds_bal_frag", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
- .set_default(true)
- .set_description(""),
-
Option("mds_bal_split_size", Option::TYPE_INT, Option::LEVEL_ADVANCED)
.set_default(10000)
.set_description(""),
{
auto fs = std::make_shared<Filesystem>();
fs->mds_map.fs_name = name;
- fs->mds_map.max_mds = 1;
fs->mds_map.data_pools.push_back(data_pool);
fs->mds_map.metadata_pool = metadata_pool;
fs->mds_map.cas_pool = -1;
- fs->mds_map.max_file_size = g_conf->mds_max_file_size;
fs->mds_map.compat = compat;
fs->mds_map.created = ceph_clock_now();
fs->mds_map.modified = ceph_clock_now();
- fs->mds_map.session_timeout = g_conf->mds_session_timeout;
- fs->mds_map.session_autoclose = g_conf->mds_session_autoclose;
fs->mds_map.enabled = true;
if (features & CEPH_FEATURE_SERVER_JEWEL) {
fs->fscid = next_filesystem_id++;
// Carry forward what makes sense
new_fs->fscid = fs->fscid;
new_fs->mds_map.inline_data_enabled = fs->mds_map.inline_data_enabled;
- new_fs->mds_map.max_mds = 1;
new_fs->mds_map.data_pools = fs->mds_map.data_pools;
new_fs->mds_map.metadata_pool = fs->mds_map.metadata_pool;
new_fs->mds_map.cas_pool = fs->mds_map.cas_pool;
new_fs->mds_map.fs_name = fs->mds_map.fs_name;
- new_fs->mds_map.max_file_size = g_conf->mds_max_file_size;
new_fs->mds_map.compat = compat;
new_fs->mds_map.created = ceph_clock_now();
new_fs->mds_map.modified = ceph_clock_now();
- new_fs->mds_map.session_timeout = g_conf->mds_session_timeout;
- new_fs->mds_map.session_autoclose = g_conf->mds_session_autoclose;
new_fs->mds_map.standby_count_wanted = fs->mds_map.standby_count_wanted;
new_fs->mds_map.enabled = true;
/**
* Return true if any currently revoking caps exceed the
- * mds_session_timeout threshold.
+ * session_timeout threshold.
*/
bool Locker::any_late_revoking_caps(xlist<Capability*> const &revoking) const
{
} else {
utime_t now = ceph_clock_now();
utime_t age = now - (*p)->get_last_revoke_stamp();
- if (age <= g_conf->mds_session_timeout) {
+ if (age <= mds->mdsmap->get_session_timeout()) {
return false;
} else {
return true;
utime_t age = now - cap->get_last_revoke_stamp();
dout(20) << __func__ << " age = " << age << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
- if (age <= g_conf->mds_session_timeout) {
- dout(20) << __func__ << " age below timeout " << g_conf->mds_session_timeout << dendl;
+ if (age <= mds->mdsmap->get_session_timeout()) {
+ dout(20) << __func__ << " age below timeout " << mds->mdsmap->get_session_timeout() << dendl;
break;
} else {
++n;
}
}
// exponential backoff of warning intervals
- if (age > g_conf->mds_session_timeout * (1 << cap->get_num_revoke_warnings())) {
+ if (age > mds->mdsmap->get_session_timeout() * (1 << cap->get_num_revoke_warnings())) {
cap->inc_num_revoke_warnings();
stringstream ss;
ss << "client." << cap->get_client() << " isn't responding to mclientcaps(revoke), ino "
void MDBalancer::maybe_fragment(CDir *dir, bool hot)
{
// split/merge
- if (g_conf->mds_bal_frag && g_conf->mds_bal_fragment_interval > 0 &&
+ if (g_conf->mds_bal_fragment_interval > 0 &&
!dir->inode->is_base() && // not root/base (for now at least)
dir->is_auth()) {
flags(CEPH_MDSMAP_DEFAULTS), last_failure(0),
last_failure_osd_epoch(0),
tableserver(0), root(0),
- session_timeout(0),
- session_autoclose(0),
- max_file_size(0),
+ session_timeout(60),
+ session_autoclose(300),
+ max_file_size(1ULL<<40), /* 1TB */
cas_pool(-1),
metadata_pool(-1),
- max_mds(0),
+ max_mds(1),
standby_count_wanted(-1),
ever_allowed_features(0),
explicitly_allowed_features(0),
utime_t get_session_timeout() const {
return utime_t(session_timeout,0);
}
+ void set_session_timeout(uint32_t t) {
+ session_timeout = t;
+ }
utime_t get_session_autoclose() const {
return utime_t(session_autoclose, 0);
}
+ void set_session_autoclose(uint32_t t) {
+ session_autoclose = t;
+ }
uint64_t get_max_filesize() const { return max_file_size; }
void set_max_filesize(uint64_t m) { max_file_size = m; }
// (caps go stale, lease die)
utime_t now = ceph_clock_now();
utime_t cutoff = now;
- cutoff -= g_conf->mds_session_timeout;
+ cutoff -= mds->mdsmap->get_session_timeout();
while (1) {
Session *session = mds->sessionmap.get_oldest_session(Session::STATE_OPEN);
if (!session) break;
// autoclose
cutoff = now;
- cutoff -= g_conf->mds_session_autoclose;
+ cutoff -= mds->mdsmap->get_session_autoclose();
// don't kick clients if we've been laggy
if (mds->get_laggy_until() > cutoff) {
{
fs->mds_map.set_standby_count_wanted(n);
});
+ } else if (var == "session_timeout") {
+ if (interr.length()) {
+ ss << var << " requires an integer value";
+ return -EINVAL;
+ }
+ if (n < 30) {
+ ss << var << " must be at least 30s";
+ return -ERANGE;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [n](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_session_timeout((uint32_t)n);
+ });
+ } else if (var == "session_autoclose") {
+ if (interr.length()) {
+ ss << var << " requires an integer value";
+ return -EINVAL;
+ }
+ if (n < 30) {
+ ss << var << " must be at least 30s";
+ return -ERANGE;
+ }
+ fsmap.modify_filesystem(
+ fs->fscid,
+ [n](std::shared_ptr<Filesystem> fs)
+ {
+ fs->mds_map.set_session_autoclose((uint32_t)n);
+ });
} else {
ss << "unknown variable " << var;
return -EINVAL;
"name=fs_name,type=CephString " \
"name=var,type=CephChoices,strings=max_mds|max_file_size"
"|allow_new_snaps|inline_data|cluster_down|allow_multimds|allow_dirfrags|balancer" \
- "|standby_count_wanted " \
+ "|standby_count_wanted|session_timeout|session_autoclose " \
"name=val,type=CephString " \
"name=confirm,type=CephString,req=false", \
- "set mds parameter <var> to <val>", "mds", "rw", "cli,rest")
+ "set fs parameter <var> to <val>", "mds", "rw", "cli,rest")
COMMAND("fs flag set name=flag_name,type=CephChoices,strings=enable_multiple "
"name=val,type=CephString " \
"name=confirm,type=CephChoices,strings=--yes-i-really-mean-it,req=false", \