health.metrics.push_back(m);
}
+ // Detect clients failing to respond to modifications to capabilities in
+ // CLIENT_CAPS messages.
+ std::list<const Capability*> late_caps;
+ mds->locker->get_late_cap_releases(&late_caps);
+ std::set<client_t> late_clients;
+ for (std::list<const Capability*>::iterator i =late_caps.begin(); i != late_caps.end(); ++i) {
+ const Capability *cap = *i;
+ late_clients.insert(cap->get_client());
+ }
+
+ for (std::set<client_t>::iterator i = late_clients.begin(); i != late_clients.end(); ++i) {
+ std::ostringstream oss;
+ oss << "client." << *i << " failing to respond to capability release";
+ MDSHealthMetric m(MDS_HEALTH_CLIENT_LATE_RELEASE, HEALTH_WARN, oss.str());
+ m.metadata["client_id"] = stringify(i->v);
+ health.metrics.push_back(m);
+ }
+
// Detect clients failing to generate cap releases from SESSION_RECALL messages
// May be due to buggy client or resource-hogging application.
set<Session*> sessions;
void clear_new() { state &= ~STATE_NEW; }
CInode *get_inode() { return inode; }
- client_t get_client() { return client; }
+ client_t get_client() const { return client; }
// caps this client wants to hold
int wanted() { return _wanted; }
{
utime_t now = ceph_clock_now(g_ceph_context);
+ dout(20) << __func__ << " " << revoking_caps.size() << " revoking caps" << dendl;
+
for (xlist<Capability*>::iterator p = revoking_caps.begin(); !p.end(); ++p) {
Capability *cap = *p;
+
utime_t age = now - cap->get_last_revoke_stamp();
- if (age <= g_conf->mds_revoke_cap_timeout)
+ dout(20) << __func__ << " age = " << age << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
+ if (age <= g_conf->mds_revoke_cap_timeout) {
+ dout(20) << __func__ << " age below timeout " << g_conf->mds_revoke_cap_timeout << dendl;
break;
+ }
// exponential backoff of warning intervals
if (age > g_conf->mds_revoke_cap_timeout * (1 << cap->get_num_revoke_warnings())) {
cap->inc_num_revoke_warnings();
stringstream ss;
- ss << "client." << cap->get_client() << " isn't responding to MClientCaps(revoke), ino "
+ ss << "client." << cap->get_client() << " isn't responding to mclientcaps(revoke), ino "
<< cap->get_inode()->ino() << " pending " << ccap_string(cap->pending())
<< " issued " << ccap_string(cap->issued()) << ", sent " << age << " seconds ago\n";
mds->clog->warn() << ss.str();
+ dout(20) << __func__ << " " << ss.str() << dendl;
+ } else {
+ dout(20) << __func__ << " silencing log message (backoff) for " << cap->get_client() << "." << cap->get_inode()->ino() << dendl;
+ }
+ }
+}
+
+void Locker::get_late_cap_releases(std::list<const Capability*> *late_caps) const
+{
+ assert(late_caps != NULL);
+
+ utime_t now = ceph_clock_now(g_ceph_context);
+
+ for (xlist<Capability*>::const_iterator p = revoking_caps.begin(); !p.end(); ++p) {
+ Capability *cap = *p;
+
+ utime_t age = now - cap->get_last_revoke_stamp();
+ if (age <= g_conf->mds_revoke_cap_timeout) {
+ break;
+ } else {
+ late_caps->push_back(cap);
}
}
}
class SimpleLock;
class ScatterLock;
class LocalLock;
+
class MDCache;
typedef ceph::shared_ptr<MDRequestImpl> MDRequestRef;
void remove_client_cap(CInode *in, client_t client);
+ void get_late_cap_releases(std::list<const Capability*> *late_caps) const;
+
protected:
void adjust_cap_wanted(Capability *cap, int wanted, int issue_seq);
void handle_client_caps(class MClientCaps *m);
enum mds_metric_t {
MDS_HEALTH_NULL = 0,
MDS_HEALTH_TRIM = 1,
- MDS_HEALTH_CLIENT_RECALL = 2
+ MDS_HEALTH_CLIENT_RECALL = 2,
+ MDS_HEALTH_CLIENT_LATE_RELEASE = 3
};
/**