From 94173a37f5b177b954f3d02352b826ae774a8e4c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 4 Sep 2014 20:15:18 +0800 Subject: [PATCH] mds: warn clients which aren't revoking caps Add a list to Locker to track revoking caps. print a warning message if client does not release caps within the given time. Signed-off-by: Yan, Zheng --- src/common/config_opts.h | 3 ++- src/mds/CInode.cc | 1 + src/mds/Capability.h | 14 ++++++++++++-- src/mds/Locker.cc | 36 ++++++++++++++++++++++++++++++++++-- src/mds/Locker.h | 3 +++ src/mds/MDS.cc | 2 +- 6 files changed, 53 insertions(+), 6 deletions(-) diff --git a/src/common/config_opts.h b/src/common/config_opts.h index 008c89be77d35..b4486699c932f 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -315,7 +315,8 @@ OPTION(mds_beacon_grace, OPT_FLOAT, 15) OPTION(mds_enforce_unique_name, OPT_BOOL, true) OPTION(mds_blacklist_interval, OPT_FLOAT, 24.0*60.0) // how long to blacklist failed nodes OPTION(mds_session_timeout, OPT_FLOAT, 60) // cap bits and leases time out if client idle -OPTION(mds_freeze_tree_timeout, OPT_FLOAT, 30) // cap bits and leases time out if client idle +OPTION(mds_revoke_cap_timeout, OPT_FLOAT, 60) // detect clients which aren't revoking caps +OPTION(mds_freeze_tree_timeout, OPT_FLOAT, 30) // detecting freeze tree deadlock OPTION(mds_session_autoclose, OPT_FLOAT, 300) // autoclose idle session OPTION(mds_reconnect_timeout, OPT_FLOAT, 45) // seconds to wait for clients during mds restart // make it (mds_session_timeout - mds_beacon_grace) diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc index b37f77ce76ba0..8624b66896eef 100644 --- a/src/mds/CInode.cc +++ b/src/mds/CInode.cc @@ -2572,6 +2572,7 @@ void CInode::remove_client_cap(client_t client) Capability *cap = client_caps[client]; cap->item_session_caps.remove_myself(); + cap->item_revoking_caps.remove_myself(); containing_realm->remove_cap(client, cap); if (client == loner_cap) diff --git a/src/mds/Capability.h b/src/mds/Capability.h index ab5742bfc39c1..37adab19fd559 100644 --- a/src/mds/Capability.h +++ b/src/mds/Capability.h @@ -115,7 +115,8 @@ private: __u32 _wanted; // what the client wants (ideally) utime_t last_issue_stamp; - + utime_t last_revoke_stamp; + unsigned num_revoke_warnings; // track in-flight caps -------------- // - add new caps to _pending @@ -193,6 +194,9 @@ public: _issued = caps | _pending; } } + + if (_issued == _pending) + item_revoking_caps.remove_myself(); //check_rdcaps_list(); } // we may get a release racing with revocations, which means our revokes will be ignored @@ -226,6 +230,7 @@ public: xlist::item item_session_caps; xlist::item item_snaprealm_caps; + xlist::item item_revoking_caps; Capability(CInode *i = NULL, uint64_t id = 0, client_t c = 0) : inode(i), client(c), @@ -238,7 +243,7 @@ public: suppress(0), state(0), client_follows(0), client_xattr_version(0), client_inline_version(0), - item_session_caps(this), item_snaprealm_caps(this) { + item_session_caps(this), item_snaprealm_caps(this), item_revoking_caps(this) { g_num_cap++; g_num_capa++; } @@ -255,9 +260,14 @@ public: ceph_seq_t get_last_sent() { return last_sent; } utime_t get_last_issue_stamp() { return last_issue_stamp; } + utime_t get_last_revoke_stamp() { return last_revoke_stamp; } void set_last_issue() { last_issue = last_sent; } void set_last_issue_stamp(utime_t t) { last_issue_stamp = t; } + void set_last_revoke_stamp(utime_t t) { last_revoke_stamp = t; } + void reset_num_revoke_warnings() { num_revoke_warnings = 0; } + void inc_num_revoke_warnings() { ++num_revoke_warnings; } + unsigned get_num_revoke_warnings() { return num_revoke_warnings; } void set_cap_id(uint64_t i) { cap_id = i; } uint64_t get_cap_id() { return cap_id; } diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 667bf55e23899..5efbfa9c36662 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -112,6 +112,11 @@ void Locker::dispatch(Message *m) } } +void Locker::tick() +{ + scatter_tick(); + caps_tick(); +} /* * locks vs rejoin @@ -1932,8 +1937,14 @@ bool Locker::issue_caps(CInode *in, Capability *only_cap) << " new pending " << ccap_string(after) << " was " << ccap_string(before) << dendl; - MClientCaps *m = new MClientCaps((before & ~after) ? CEPH_CAP_OP_REVOKE:CEPH_CAP_OP_GRANT, - in->ino(), + int op = (before & ~after) ? CEPH_CAP_OP_REVOKE : CEPH_CAP_OP_GRANT; + if (op == CEPH_CAP_OP_REVOKE) { + revoking_caps.push_back(&cap->item_revoking_caps); + cap->set_last_revoke_stamp(ceph_clock_now(g_ceph_context)); + cap->reset_num_revoke_warnings(); + } + + MClientCaps *m = new MClientCaps(op, in->ino(), in->find_snaprealm()->inode->ino(), cap->get_cap_id(), cap->get_last_seq(), after, wanted, 0, @@ -3192,6 +3203,27 @@ void Locker::remove_client_cap(CInode *in, client_t client) try_eval(in, CEPH_CAP_LOCKS); } +void Locker::caps_tick() +{ + utime_t now = ceph_clock_now(g_ceph_context); + + for (xlist::iterator p = revoking_caps.begin(); !p.end(); ++p) { + Capability *cap = *p; + + utime_t age = now - cap->get_last_revoke_stamp(); + if (age <= g_conf->mds_revoke_cap_timeout) + break; + // exponential backoff of warning intervals + if (age > g_conf->mds_revoke_cap_timeout * (1 << cap->get_num_revoke_warnings())) { + cap->inc_num_revoke_warnings(); + stringstream ss; + ss << "client." << cap->get_client() << " isn't responding to MClientCaps(revoke), ino " + << cap->get_inode()->ino() << " pending " << ccap_string(cap->pending()) + << " issued " << ccap_string(cap->issued()) << ", sent " << age << " seconds ago\n"; + mds->clog->warn() << ss.str(); + } + } +} void Locker::handle_client_lease(MClientLease *m) { diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 7f7a976a017d2..36bd91e42f210 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -66,6 +66,7 @@ private: void dispatch(Message *m); void handle_lock(MLock *m); + void tick(); void nudge_log(SimpleLock *lock); @@ -203,7 +204,9 @@ public: MClientCaps *ack=0); void handle_client_cap_release(class MClientCapRelease *m); void _do_cap_release(client_t client, inodeno_t ino, uint64_t cap_id, ceph_seq_t mseq, ceph_seq_t seq); + void caps_tick(); + xlist revoking_caps; // local public: diff --git a/src/mds/MDS.cc b/src/mds/MDS.cc index 5923b47d6ef62..1247c5771a82b 100644 --- a/src/mds/MDS.cc +++ b/src/mds/MDS.cc @@ -756,7 +756,7 @@ void MDS::tick() // ... if (is_clientreplay() || is_active() || is_stopping()) { - locker->scatter_tick(); + locker->tick(); server->find_idle_sessions(); } -- 2.39.5