def _session_by_id(self, session_ls):
return dict([(s['id'], s) for s in session_ls])
+ def perf_dump(self, rank=None, status=None):
+ return self.fs.rank_asok(['perf', 'dump'], rank=rank, status=status)
+
def wait_until_evicted(self, client_id, timeout=30):
def is_client_evicted():
ls = self._session_list()
else:
raise RuntimeError("expected no client recall warning")
+ def test_cap_acquisition_throttle_readdir(self):
+ """
+ Mostly readdir acquires caps faster than the mds recalls, so the cap
+ acquisition via readdir is throttled by retrying the readdir after
+ a fraction of second (0.5) by default when throttling condition is met.
+ """
+
+ max_caps_per_client = 500
+ cap_acquisition_throttle = 250
+
+ self.config_set('mds', 'mds_max_caps_per_client', max_caps_per_client)
+ self.config_set('mds', 'mds_session_cap_acquisition_throttle', cap_acquisition_throttle)
+
+ # Create 1500 files split across 6 directories, 250 each.
+ for i in range(1, 7):
+ self.mount_a.create_n_files("dir{0}/file".format(i), cap_acquisition_throttle, sync=True)
+
+ mount_a_client_id = self.mount_a.get_global_id()
+
+ # recursive readdir
+ self.mount_a.run_shell_payload("find | wc")
+
+ # validate cap_acquisition decay counter after readdir to exceed throttle count i.e 250
+ cap_acquisition_value = self.get_session(mount_a_client_id)['cap_acquisition']['value']
+ self.assertGreaterEqual(cap_acquisition_value, cap_acquisition_throttle)
+
+ # validate the throttle condition to be hit atleast once
+ cap_acquisition_throttle_hit_count = self.perf_dump()['mds_server']['cap_acquisition_throttle']
+ self.assertGreaterEqual(cap_acquisition_throttle_hit_count, 1)
+
def test_client_release_bug(self):
"""
When a client has a bug (which we will simulate) preventing it from releasing caps,
.set_flag(Option::FLAG_RUNTIME)
.set_long_description("This is the order of magnitude difference (in base 2) of the internal liveness decay counter and the number of capabilities the session holds. When this difference occurs, the MDS treats the session as quiescent and begins recalling capabilities."),
+ Option("mds_session_cap_acquisition_decay_rate", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(10)
+ .set_description("decay rate for session readdir caps leading to readdir throttle")
+ .set_flag(Option::FLAG_RUNTIME)
+ .set_long_description("The half-life for the session cap acquisition counter of caps acquired by readdir. This is used for throttling readdir requests from clients slow to release caps."),
+
+ Option("mds_session_cap_acquisition_throttle", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(500000)
+ .set_description("throttle point for cap acquisition decay counter"),
+
+ Option("mds_session_max_caps_throttle_ratio", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(1.1)
+ .set_description("ratio of mds_max_maps_per_client that client must exceed before readdir may be throttled by cap acquisition throttle"),
+
+ Option("mds_cap_acquisition_throttle_retry_request_timeout", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(0.5)
+ .set_description("timeout in seconds after which a client request is retried due to cap acquisition throttling"),
+
Option("mds_freeze_tree_timeout", Option::TYPE_FLOAT, Option::LEVEL_DEV)
.set_default(30)
.set_description(""),
"mds_request_load_average_decay_rate",
"mds_session_cache_liveness_decay_rate",
"mds_heartbeat_grace",
+ "mds_session_cap_acquisition_decay_rate",
+ "mds_max_caps_per_client",
+ "mds_session_cap_acquisition_throttle",
+ "mds_session_max_caps_throttle_ratio",
+ "mds_cap_acquisition_throttle_retry_request_time",
NULL
};
return KEYS;
PerfCountersBuilder::PRIO_INTERESTING);
plb.add_u64_counter(l_mdss_cap_revoke_eviction, "cap_revoke_eviction",
"Cap Revoke Client Eviction", "cre", PerfCountersBuilder::PRIO_INTERESTING);
+ plb.add_u64_counter(l_mdss_cap_acquisition_throttle,
+ "cap_acquisition_throttle", "Cap acquisition throttle counter", "cat",
+ PerfCountersBuilder::PRIO_INTERESTING);
// fop latencies are useful
plb.set_prio_default(PerfCountersBuilder::PRIO_USEFUL);
cap_revoke_eviction_timeout = g_conf().get_val<double>("mds_cap_revoke_eviction_timeout");
max_snaps_per_dir = g_conf().get_val<uint64_t>("mds_max_snaps_per_dir");
delegate_inos_pct = g_conf().get_val<uint64_t>("mds_client_delegate_inos_pct");
+ max_caps_per_client = g_conf().get_val<uint64_t>("mds_max_caps_per_client");
+ cap_acquisition_throttle = g_conf().get_val<uint64_t>("mds_session_cap_acquisition_throttle");
+ max_caps_throttle_ratio = g_conf().get_val<double>("mds_session_max_caps_throttle_ratio");
+ caps_throttle_retry_request_timeout = g_conf().get_val<double>("mds_cap_acquisition_throttle_retry_request_timeout");
supported_features = feature_bitset_t(CEPHFS_FEATURES_MDS_SUPPORTED);
}
if (changed.count("mds_client_delegate_inos_pct")) {
delegate_inos_pct = g_conf().get_val<uint64_t>("mds_client_delegate_inos_pct");
}
+ if (changed.count("mds_max_caps_per_client")) {
+ max_caps_per_client = g_conf().get_val<uint64_t>("mds_max_caps_per_client");
+ }
+ if (changed.count("mds_session_cap_acquisition_throttle")) {
+ cap_acquisition_throttle = g_conf().get_val<uint64_t>("mds_session_cap_acquisition_throttle");
+ }
+ if (changed.count("mds_session_max_caps_throttle_ratio")) {
+ max_caps_throttle_ratio = g_conf().get_val<double>("mds_session_max_caps_throttle_ratio");
+ }
+ if (changed.count("mds_cap_acquisition_throttle_retry_request_timeout")) {
+ caps_throttle_retry_request_timeout = g_conf().get_val<double>("mds_cap_acquisition_throttle_retry_request_timeout");
+ }
}
/*
void Server::handle_client_readdir(MDRequestRef& mdr)
{
const cref_t<MClientRequest> &req = mdr->client_request;
+ Session *session = mds->get_session(req);
client_t client = req->get_source().num();
MutationImpl::LockOpVec lov;
CInode *diri = rdlock_path_pin_ref(mdr, false, true);
return;
}
+ auto num_caps = session->get_num_caps();
+ auto session_cap_acquisition = session->get_cap_acquisition();
+
+ if (num_caps > static_cast<uint64_t>(max_caps_per_client * max_caps_throttle_ratio) && session_cap_acquisition >= cap_acquisition_throttle) {
+ dout(20) << "readdir throttled. max_caps_per_client: " << max_caps_per_client << " num_caps: " << num_caps
+ << " session_cap_acquistion: " << session_cap_acquisition << " cap_acquisition_throttle: " << cap_acquisition_throttle << dendl;
+ if (logger)
+ logger->inc(l_mdss_cap_acquisition_throttle);
+
+ mds->timer.add_event_after(caps_throttle_retry_request_timeout, new C_MDS_RetryRequest(mdcache, mdr));
+ return;
+ }
+
lov.add_rdlock(&diri->filelock);
lov.add_rdlock(&diri->dirfragtreelock);
mdcache->lru.lru_touch(dn);
}
+ session->touch_readdir_cap(numfiles);
+
__u16 flags = 0;
if (end) {
flags = CEPH_READDIR_FRAG_END;
l_mdss_req_symlink_latency,
l_mdss_req_unlink_latency,
l_mdss_cap_revoke_eviction,
+ l_mdss_cap_acquisition_throttle,
l_mdss_last,
};
time last_recall_state;
MetricsHandler *metrics_handler;
+
+ // Cache cap acquisition throttle configs
+ uint64_t max_caps_per_client;
+ uint64_t cap_acquisition_throttle;
+ double max_caps_throttle_ratio;
+ double caps_throttle_retry_request_timeout;
};
static inline constexpr auto operator|(Server::RecallFlags a, Server::RecallFlags b) {
f->dump_object("recall_caps_throttle", recall_caps_throttle);
f->dump_object("recall_caps_throttle2o", recall_caps_throttle2o);
f->dump_object("session_cache_liveness", session_cache_liveness);
+ f->dump_object("cap_acquisition", cap_acquisition);
info.dump(f);
}
};
apply_to_open_sessions(mut);
}
+ if (changed.count("mds_session_cap_acquisition_decay_rate")) {
+ auto d = g_conf().get_val<double>("mds_session_cap_acquisition_decay_rate");
+ auto mut = [d](auto s) {
+ s->cap_acquisition = DecayCounter(d);
+ };
+ apply_to_open_sessions(mut);
+ }
}
void SessionMap::update_average_session_age() {
recall_caps_throttle(g_conf().get_val<double>("mds_recall_max_decay_rate")),
recall_caps_throttle2o(0.5),
session_cache_liveness(g_conf().get_val<double>("mds_session_cache_liveness_decay_rate")),
+ cap_acquisition(g_conf().get_val<double>("mds_session_cap_acquisition_decay_rate")),
birth_time(clock::now())
{
set_connection(std::move(con));
auto get_session_cache_liveness() const {
return session_cache_liveness.get();
}
+ auto get_cap_acquisition() const {
+ return cap_acquisition.get();
+ }
inodeno_t take_ino(inodeno_t ino = 0) {
if (ino) {
}
}
+ void touch_readdir_cap(uint32_t count) {
+ cap_acquisition.hit(count);
+ }
+
void touch_cap(Capability *cap) {
session_cache_liveness.hit(1.0);
caps.push_front(&cap->item_session_caps);
// session caps liveness
DecayCounter session_cache_liveness;
+ // cap acquisition via readdir
+ DecayCounter cap_acquisition;
+
// session start time -- used to track average session time
// note that this is initialized in the constructor rather
// than at the time of adding a session to the sessionmap