.set_default(60.0)
.set_description("decay rate for warning on slow session cap recall"),
+ Option("mds_session_cache_liveness_decay_rate", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .add_see_also("mds_session_cache_liveness_magnitude")
+ .set_default(5_min)
+ .set_description("decay rate for session liveness leading to preemptive cap recall")
+ .set_long_description("This determines how long a session needs to be quiescent before the MDS begins preemptively recalling capabilities. The default of 5 minutes will cause 10 halvings of the decay counter after 1 hour, or 1/1024. The default magnitude of 10 (1^10 or 1024) is chosen so that the MDS considers a previously chatty session (approximately) to be quiescent after 1 hour."),
+
+ Option("mds_session_cache_liveness_magnitude", Option::TYPE_SIZE, Option::LEVEL_ADVANCED)
+ .add_see_also("mds_session_cache_liveness_decay_rate")
+ .set_default(10)
+ .set_description("decay magnitude for preemptively recalling caps on quiet client")
+ .set_long_description("This is the order of magnitude difference (in base 2) of the internal liveness decay counter and the number of capabilities the session holds. When this difference occurs, the MDS treats the session as quiescent and begins recalling capabilities."),
+
Option("mds_freeze_tree_timeout", Option::TYPE_FLOAT, Option::LEVEL_DEV)
.set_default(30)
.set_description(""),
trim_client_leases();
trim();
check_memory_usage();
- mds->server->recall_client_state(nullptr, Server::RecallFlags::ENFORCE_MAX);
+ auto flags = Server::RecallFlags::ENFORCE_MAX|Server::RecallFlags::ENFORCE_LIVENESS;
+ mds->server->recall_client_state(nullptr, flags);
upkeep_last_trim = clock::now();
} else {
dout(10) << "cache not ready for trimming" << dendl;
"mds_recall_max_decay_rate",
"mds_recall_warning_decay_rate",
"mds_request_load_average_decay_rate",
+ "mds_session_cache_liveness_decay_rate",
NULL
};
return KEYS;
const auto now = clock::now();
const bool steady = !!(flags&RecallFlags::STEADY);
const bool enforce_max = !!(flags&RecallFlags::ENFORCE_MAX);
+ const bool enforce_liveness = !!(flags&RecallFlags::ENFORCE_LIVENESS);
const bool trim = !!(flags&RecallFlags::TRIM);
const auto max_caps_per_client = g_conf().get_val<uint64_t>("mds_max_caps_per_client");
const auto recall_global_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_global_max_decay_threshold");
const auto recall_max_caps = g_conf().get_val<Option::size_t>("mds_recall_max_caps");
const auto recall_max_decay_threshold = g_conf().get_val<Option::size_t>("mds_recall_max_decay_threshold");
+ const auto cache_liveness_magnitude = g_conf().get_val<Option::size_t>("mds_session_cache_liveness_magnitude");
dout(7) << __func__ << ":"
<< " min=" << min_caps_per_client
/* trim caps of sessions with the most caps first */
std::multimap<uint64_t, Session*> caps_session;
- auto f = [&caps_session, enforce_max, trim, max_caps_per_client](auto& s) {
+ auto f = [&caps_session, enforce_max, enforce_liveness, trim, max_caps_per_client, cache_liveness_magnitude](auto& s) {
auto num_caps = s->caps.size();
- if (trim || (enforce_max && num_caps > max_caps_per_client)) {
+ auto cache_liveness = s->get_session_cache_liveness();
+ if (trim || (enforce_max && num_caps > max_caps_per_client) || (enforce_liveness && cache_liveness < (num_caps>>cache_liveness_magnitude))) {
caps_session.emplace(std::piecewise_construct, std::forward_as_tuple(num_caps), std::forward_as_tuple(s));
}
};
STEADY = (1<<0),
ENFORCE_MAX = (1<<1),
TRIM = (1<<2),
+ ENFORCE_LIVENESS = (1<<3),
};
std::pair<bool, uint64_t> recall_client_state(MDSGatherBuilder* gather, RecallFlags=RecallFlags::NONE);
void force_clients_readonly();
f->dump_object("release_caps", release_caps);
f->dump_object("recall_caps_throttle", recall_caps_throttle);
f->dump_object("recall_caps_throttle2o", recall_caps_throttle2o);
+ f->dump_object("session_cache_liveness", session_cache_liveness);
info.dump(f);
}
};
apply_to_open_sessions(mut);
}
+ if (changed.count("mds_session_cache_liveness_decay_rate")) {
+ auto d = g_conf().get_val<double>("mds_session_cache_liveness_decay_rate");
+ auto mut = [d](auto s) {
+ s->session_cache_liveness = DecayCounter(d);
+ s->session_cache_liveness.hit(s->caps.size()); /* so the MDS doesn't immediately start trimming a new session */
+ };
+ apply_to_open_sessions(mut);
+ }
}
void SessionMap::update_average_session_age() {
// New limit in SESSION_RECALL
uint32_t recall_limit = 0;
+ // session caps liveness
+ DecayCounter session_cache_liveness;
+
// session start time -- used to track average session time
// note that this is initialized in the constructor rather
// than at the time of adding a session to the sessionmap
auto get_release_caps() const {
return release_caps.get();
}
+ auto get_session_cache_liveness() const {
+ return session_cache_liveness.get();
+ }
inodeno_t next_ino() const {
if (info.prealloc_inos.empty())
}
void touch_cap(Capability *cap) {
+ session_cache_liveness.hit(1.0);
caps.push_front(&cap->item_session_caps);
}
void touch_cap_bottom(Capability *cap) {
+ session_cache_liveness.hit(1.0);
caps.push_back(&cap->item_session_caps);
}
void touch_lease(ClientLease *r) {
+ session_cache_liveness.hit(1.0);
leases.push_back(&r->item_session_lease);
}
release_caps(g_conf().get_val<double>("mds_recall_warning_decay_rate")),
recall_caps_throttle(g_conf().get_val<double>("mds_recall_max_decay_rate")),
recall_caps_throttle2o(0.5),
+ session_cache_liveness(g_conf().get_val<double>("mds_session_cache_liveness_decay_rate")),
birth_time(clock::now()),
auth_caps(g_ceph_context),
item_session_list(this),