the false warning happens in following sequence of events
- MDS has cache pressure, sends recall state messages to clients
- Client does not trim as many caps as MDS expected. So MDS
does not reset session->recalled_at
- MDS no longer has cache pressure, it stop sending recall state
messages to clients.
- Client does not release its caps. So session->recalled_at in
MDS keeps unchanged
Signed-off-by: Yan, Zheng <zyan@redhat.com>
(cherry picked from commit
51c926a74e5ef478c11ccbcf11c351aa520dde2a)
{
set<Session*> sessions;
mds->sessionmap.get_client_session_set(sessions);
+
utime_t cutoff = ceph_clock_now(g_ceph_context);
cutoff -= g_conf->mds_recall_state_timeout;
+ utime_t last_recall = mds->mdcache->last_recall_state;
std::list<MDSHealthMetric> late_recall_metrics;
std::list<MDSHealthMetric> large_completed_requests_metrics;
dout(20) << "Session servicing RECALL " << session->info.inst
<< ": " << session->recalled_at << " " << session->recall_release_count
<< "/" << session->recall_count << dendl;
- if (session->recalled_at < cutoff) {
+ if (last_recall < cutoff || session->last_recall_sent < last_recall) {
+ dout(20) << " no longer recall" << dendl;
+ session->clear_recalled_at();
+ } else if (session->recalled_at < cutoff) {
dout(20) << " exceeded timeout " << session->recalled_at << " vs. " << cutoff << dendl;
std::ostringstream oss;
oss << "Client " << session->get_human_name() << " failing to respond to cache pressure";
if (num_inodes_with_caps > g_conf->mds_cache_size) {
float ratio = (float)g_conf->mds_cache_size * .9 / (float)num_inodes_with_caps;
- if (ratio < 1.0)
+ if (ratio < 1.0) {
+ last_recall_state = ceph_clock_now(g_ceph_context);
mds->server->recall_client_state(ratio);
+ }
}
-
}
void trim_client_leases();
void check_memory_usage();
+ utime_t last_recall_state;
+
// shutdown
void shutdown_start();
void shutdown_check();
{
if (!recalled_at.is_zero()) {
recall_release_count += n_caps;
- if (recall_release_count >= recall_count) {
- recalled_at = utime_t();
- recall_count = 0;
- recall_release_count = 0;
- }
+ if (recall_release_count >= recall_count)
+ clear_recalled_at();
}
}
if (recalled_at.is_zero()) {
// Entering recall phase, set up counters so we can later
// judge whether the client has respected the recall request
- recalled_at = ceph_clock_now(g_ceph_context);
+ recalled_at = last_recall_sent = ceph_clock_now(g_ceph_context);
assert (new_limit < caps.size()); // Behaviour of Server::recall_client_state
recall_count = caps.size() - new_limit;
recall_release_count = 0;
+ } else {
+ last_recall_sent = ceph_clock_now(g_ceph_context);
}
}
+void Session::clear_recalled_at()
+{
+ recalled_at = last_recall_sent = utime_t();
+ recall_count = 0;
+ recall_release_count = 0;
+}
+
void Session::set_client_metadata(map<string, string> const &meta)
{
info.client_metadata = meta;
// Ephemeral state for tracking progress of capability recalls
utime_t recalled_at; // When was I asked to SESSION_RECALL?
+ utime_t last_recall_sent;
uint32_t recall_count; // How many caps was I asked to SESSION_RECALL?
uint32_t recall_release_count; // How many caps have I actually revoked?
void notify_cap_release(size_t n_caps);
void notify_recall_sent(int const new_limit);
+ void clear_recalled_at();
inodeno_t next_ino() {
if (info.prealloc_inos.empty())
Session() :
state(STATE_CLOSED), state_seq(0), importing_count(0),
- recalled_at(), recall_count(0), recall_release_count(0),
+ recall_count(0), recall_release_count(0),
auth_caps(g_ceph_context),
connection(NULL), item_session_list(this),
requests(0), // member_offset passed to front() manually