- time out caps, wake up waiters on renewal
- link caps with mds session
- validate dn leases
+- fix lease validation to check session ttl
- clean up ll_ interface, now that we have leases!
- clean up client mds session vs mdsmap behavior?
- client caps migration races
- rerun destro trace against latest, with various journal lengths
-- bind lease, cap timeouts to session renew, so that they can be _longer_ than the session renew interval
- lease length heuristics
- mds lock last_change stamp?
case CEPH_SESSION_RENEWCAPS:
mds_sessions[from].cap_ttl =
- mds_sessions[from].last_cap_renew_request + mdsmap->get_cap_timeout();
+ mds_sessions[from].last_cap_renew_request + mdsmap->get_session_timeout();
break;
case CEPH_SESSION_STALE:
utime_t now = g_clock.now();
utime_t el = now - last_cap_renew;
- if (mdsmap && el > mdsmap->get_cap_timeout() / 3.0)
+ if (mdsmap && el > mdsmap->get_session_timeout() / 3.0)
renew_caps();
}
mds_beacon_interval: 4, //30.0,
mds_beacon_grace: 15, //60*60.0,
- mds_cap_timeout: 100, // cap bits time out if client idle
+ mds_session_timeout: 60, // cap bits and leases time out if client idle
mds_session_autoclose: 300, // autoclose idle session
- mds_client_lease: 100,
+ mds_client_lease: 120, // (assuming session stays alive)
mds_reconnect_timeout: 30, // seconds to wait for clients during mds restart
mds_tick_interval: 5,
float mds_beacon_interval;
float mds_beacon_grace;
- float mds_cap_timeout;
+ float mds_session_timeout;
float mds_session_autoclose;
float mds_client_lease;
float mds_reconnect_timeout;
struct ceph_inode_info *ci = ceph_inode(inode);
u64 blocks = (size + (1<<9) - 1) >> 9;
int warn = 0;
-
+
if (issued & CEPH_CAP_EXCL) {
if (timespec_compare(ctime, &inode->i_ctime) > 0)
inode->i_ctime = *ctime;
case S_IFDIR:
inode->i_op = &ceph_dir_iops;
inode->i_fop = &ceph_dir_fops;
-
+
ci->i_files = le64_to_cpu(info->files);
ci->i_subdirs = le64_to_cpu(info->subdirs);
ci->i_rbytes = le64_to_cpu(info->rbytes);
* be careful: we can't remove lease from a different session
* without holding that other session's s_mutex. so don't.
*/
- if ((ci->i_lease_ttl == 0 || !time_before(ttl, ci->i_lease_ttl)) &&
+ if ((ci->i_lease_ttl == 0 || !time_before(ttl, ci->i_lease_ttl) ||
+ ci->i_lease_gen != session->s_cap_gen) &&
(!ci->i_lease_session || ci->i_lease_session == session)) {
ci->i_lease_ttl = ttl;
+ ci->i_lease_gen = session->s_cap_gen;
ci->i_lease_mask = mask;
if (!ci->i_lease_session) {
ci->i_lease_session = session;
{
struct ceph_inode_info *ci = ceph_inode(inode);
int havemask;
- int valid;
+ int valid = 0;
int ret = 0;
spin_lock(&inode->i_lock);
havemask = ci->i_lease_mask;
- /* EXCL cap counts for an ICONTENT lease */
- if (__ceph_caps_issued(ci) & CEPH_CAP_EXCL) {
+
+ /* EXCL cap counts for an ICONTENT lease... check caps? */
+ if ((mask & CEPH_LOCK_ICONTENT) &&
+ __ceph_caps_issued(ci) & CEPH_CAP_EXCL) {
dout(20, "lease_valid inode %p EXCL cap -> ICONTENT\n", inode);
havemask |= CEPH_LOCK_ICONTENT;
}
if (havemask & CEPH_LOCK_ICONTENT)
havemask |= CEPH_LOCK_ICONTENT;
- valid = time_before(jiffies, ci->i_lease_ttl);
+ if (ci->i_lease_session) {
+ struct ceph_mds_session *s = ci->i_lease_session;
+ spin_lock(&s->s_cap_lock);
+ if (s->s_cap_gen == ci->i_lease_gen &&
+ time_before(jiffies, s->s_cap_ttl) &&
+ time_before(jiffies, ci->i_lease_ttl))
+ valid = 1;
+ spin_unlock(&s->s_cap_lock);
+ }
spin_unlock(&inode->i_lock);
if (valid && (havemask & mask) == mask)
return;
spin_lock(&dentry->d_lock);
- if (dentry->d_time != 0 && time_before(ttl, dentry->d_time))
+ di = ceph_dentry(dentry);
+ if (dentry->d_time != 0 &&
+ di && di->lease_gen == session->s_cap_gen &&
+ time_before(ttl, dentry->d_time))
goto fail_unlock; /* we already have a newer lease. */
- di = ceph_dentry(dentry);
if (!di) {
spin_unlock(&dentry->d_lock);
di = kmalloc(sizeof(struct ceph_dentry_info),
di->dentry = dentry;
dentry->d_fsdata = di;
di->lease_session = session;
+ di->lease_gen = session->s_cap_gen;
list_add(&di->lease_item, &session->s_dentry_leases);
is_new = 1;
} else {
int ceph_dentry_lease_valid(struct dentry *dentry)
{
struct ceph_dentry_info *di;
+ struct ceph_mds_session *session;
int valid = 0;
+ u64 gen;
+ unsigned long ttl;
+
spin_lock(&dentry->d_lock);
di = ceph_dentry(dentry);
- if (di && time_before(jiffies, dentry->d_time))
- valid = 1;
+ if (di) {
+ session = di->lease_session;
+ spin_lock(&session->s_cap_lock);
+ gen = session->s_cap_gen;
+ ttl = session->s_cap_ttl;
+ spin_unlock(&session->s_cap_lock);
+
+ if (di->lease_gen == gen &&
+ time_before(jiffies, dentry->d_time) &&
+ time_before(jiffies, ttl))
+ valid = 1;
+ }
spin_unlock(&dentry->d_lock);
dout(20, "dentry_lease_valid - dentry %p = %d\n", dentry, valid);
return valid;
dout(10, "fill_trace reply has empty trace!\n");
return 0;
}
-
+
#if 0
/*
* if we resend completed ops to a recovering mds, we get no
/*
* if the dentry is unhashed AND we have a cap, stat
- * the ino directly. (if its unhashed and we don't have a
+ * the ino directly. (if its unhashed and we don't have a
* cap, we may be screwed anyway.)
*/
if (d_unhashed(dentry)) {
names += len + 1;
p += len;
ceph_decode_32(&p, len);
- p += len;
+ p += len;
}
} else
names[0] = 0;
goto out;
}
kaddr = kmap(pages[i]);
- memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
+ memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
}
}
session->s_state = CEPH_MDS_SESSION_OPEN;
spin_lock(&session->s_cap_lock);
session->s_cap_ttl = session->s_renew_requested +
- mdsc->mdsmap->m_cap_bit_timeout*HZ;
+ mdsc->mdsmap->m_session_timeout*HZ;
spin_unlock(&session->s_cap_lock);
complete(&session->s_completion);
break;
case CEPH_SESSION_RENEWCAPS:
spin_lock(&session->s_cap_lock);
session->s_cap_ttl = session->s_renew_requested +
- mdsc->mdsmap->m_cap_bit_timeout*HZ;
+ mdsc->mdsmap->m_session_timeout*HZ;
spin_unlock(&session->s_cap_lock);
dout(10, "session renewed caps from mds%d, ttl now %lu\n", mds,
session->s_cap_ttl);
struct ceph_msg *msg;
struct ceph_mds_lease *lease;
struct ceph_inode_info *ci;
+ struct ceph_dentry_info *di;
int origmask = mask;
int mds = -1;
int len = sizeof(*lease) + sizeof(__u32);
int dnamelen = 0;
- __u64 ino;
BUG_ON(inode == 0);
- if ((mask & CEPH_LOCK_DN) &&
- dentry && dentry->d_fsdata &&
- ceph_dentry(dentry)->lease_session->s_mds >= 0 &&
- time_before(jiffies, dentry->d_time)) {
- dnamelen = dentry->d_name.len;
- len += dentry->d_name.len;
- mds = ceph_dentry(dentry)->lease_session->s_mds;
+
+ /* is dentry lease valid? */
+ if ((mask & CEPH_LOCK_DN) && dentry) {
+ spin_lock(&dentry->d_lock);
+ di = ceph_dentry(dentry);
+ if (di &&
+ di->lease_session->s_mds >= 0 &&
+ di->lease_gen == di->lease_session->s_cap_gen &&
+ time_before(jiffies, dentry->d_time)) {
+ dnamelen = dentry->d_name.len;
+ len += dentry->d_name.len;
+ mds = di->lease_session->s_mds;
+ } else
+ mask &= ~CEPH_LOCK_DN; /* no lease; clear DN bit */
+ spin_unlock(&dentry->d_lock);
} else
- mask &= ~CEPH_LOCK_DN; /* nothing to release */
+ mask &= ~CEPH_LOCK_DN; /* no lease; clear DN bit */
+
+ /* inode lease? */
ci = ceph_inode(inode);
- ino = ci->i_ceph_ino;
+ spin_lock(&inode->i_lock);
if (ci->i_lease_session &&
- time_before(jiffies, ci->i_lease_ttl) &&
- ci->i_lease_session->s_mds >= 0) {
+ ci->i_lease_session->s_mds >= 0 &&
+ ci->i_lease_gen == ci->i_lease_session->s_cap_gen &&
+ time_before(jiffies, ci->i_lease_ttl)) {
mds = ci->i_lease_session->s_mds;
mask &= CEPH_LOCK_DN | ci->i_lease_mask; /* lease is valid */
ci->i_lease_mask &= ~mask;
} else
mask &= CEPH_LOCK_DN; /* no lease; clear all but DN bits */
+ spin_unlock(&inode->i_lock);
+
if (mask == 0) {
dout(10, "lease_release inode %p (%d) dentry %p -- "
"no lease on %d\n",
}
BUG_ON(mds < 0);
- dout(10, "lease_release inode %p dentry %p mask %d to mds%d\n", inode,
- dentry, mask, mds);
+ dout(10, "lease_release inode %p dentry %p %d mask %d to mds%d\n",
+ inode, dentry, dnamelen, mask, mds);
msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, 0, 0, 0);
if (IS_ERR(msg))
lease = msg->front.iov_base;
lease->action = CEPH_MDS_LEASE_RELEASE;
lease->mask = mask;
- lease->ino = cpu_to_le64(ino); /* ?? */
+ lease->ino = cpu_to_le64(ceph_ino(inode));
*(__le32 *)(lease+1) = cpu_to_le32(dnamelen);
if (dentry)
memcpy((void *)(lease + 1) + 4, dentry->d_name.name, dnamelen);
int i;
struct ceph_mds_client *mdsc =
container_of(work, struct ceph_mds_client, delayed_work.work);
- int renew_interval = mdsc->mdsmap->m_cap_bit_timeout >> 2;
+ int renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
int renew_caps = time_after_eq(jiffies, HZ*renew_interval +
mdsc->last_renew_caps);
u32 want_map = 0;
int s_mds;
int s_state;
unsigned long s_ttl; /* time until mds kills us */
- __u64 s_seq; /* incoming msg seq # */
+ u64 s_seq; /* incoming msg seq # */
struct mutex s_mutex;
spinlock_t s_cap_lock; /* protects s_cap_gen, s_cap_ttl */
- __u64 s_cap_gen; /* inc each time we get mds stale msg */
+ u64 s_cap_gen; /* inc each time we get mds stale msg */
unsigned long s_cap_ttl, s_renew_requested;
struct list_head s_caps;
struct list_head s_inode_leases, s_dentry_leases;
ceph_decode_32(p, m->m_created.tv_nsec);
ceph_decode_32(p, m->m_anchortable);
ceph_decode_32(p, m->m_root);
- ceph_decode_32(p, m->m_cap_bit_timeout);
+ ceph_decode_32(p, m->m_session_timeout);
ceph_decode_32(p, m->m_session_autoclose);
ceph_decode_32(p, m->m_max_mds);
struct ceph_timespec m_created;
__u32 m_anchortable;
__u32 m_root;
- __u32 m_cap_bit_timeout;
+ __u32 m_session_timeout;
__u32 m_session_autoclose;
__u32 m_max_mds; /* size of m_addr, m_state arrays */
struct ceph_entity_addr *m_addr; /* array of addresses */
int i_lease_mask;
struct ceph_mds_session *i_lease_session;
long unsigned i_lease_ttl; /* jiffies */
+ u64 i_lease_gen;
struct list_head i_lease_item; /* mds session list */
struct rb_root i_fragtree;
struct ceph_dentry_info {
struct dentry *dentry;
struct ceph_mds_session *lease_session;
+ u64 lease_gen;
struct list_head lease_item; /* mds session list */
};
}
}
+void Locker::remove_stale_leases(Session *session)
+{
+ dout(10) << "remove_stale_leases for " << session->inst.name << dendl;
+ for (xlist<ClientLease*>::iterator p = session->leases.begin(); !p.end(); ++p) {
+ ClientLease *l = *p;
+ MDSCacheObject *parent = l->parent;
+ dout(15) << " removing lease for " << l->mask << " on " << *parent << dendl;
+ parent->remove_client_lease(l, l->mask, this);
+ }
+}
+
+
class C_MDL_RequestInodeFileCaps : public Context {
Locker *locker;
CInode *in;
void issue_truncate(CInode *in);
void revoke_stale_caps(Session *session);
void resume_stale_caps(Session *session);
+ void remove_stale_leases(Session *session);
protected:
void handle_client_file_caps(class MClientFileCaps *m);
int32_t anchortable; // which MDS has anchortable (fixme someday)
int32_t root; // which MDS has root directory
- __u32 cap_bit_timeout;
+ __u32 session_timeout;
__u32 session_autoclose;
map<int32_t,int32_t> mds_state; // MDS state
public:
MDSMap() : epoch(0), client_epoch(0), last_failure(0), anchortable(0), root(0) {
// hack.. this doesn't really belong here
- cap_bit_timeout = (int)g_conf.mds_cap_timeout;
+ session_timeout = (int)g_conf.mds_session_timeout;
session_autoclose = (int)g_conf.mds_session_autoclose;
}
- utime_t get_cap_timeout() {
- return utime_t(cap_bit_timeout,0);
+ utime_t get_session_timeout() {
+ return utime_t(session_timeout,0);
}
epoch_t get_epoch() const { return epoch; }
::encode(created, bl);
::encode(anchortable, bl);
::encode(root, bl);
- ::encode(cap_bit_timeout, bl);
+ ::encode(session_timeout, bl);
::encode(session_autoclose, bl);
::encode(max_mds, bl);
::encode(mds_state, bl);
::decode(created, p);
::decode(anchortable, p);
::decode(root, p);
- ::decode(cap_bit_timeout, p);
+ ::decode(session_timeout, p);
::decode(session_autoclose, p);
::decode(max_mds, p);
::decode(mds_state, p);
{
dout(10) << "find_idle_sessions" << dendl;
- // stale
+ // timeout/stale
+ // (caps go stale, lease die)
utime_t now = g_clock.now();
utime_t cutoff = now;
- cutoff -= g_conf.mds_cap_timeout;
+ cutoff -= g_conf.mds_session_timeout;
while (1) {
Session *session = mds->sessionmap.get_oldest_session(Session::STATE_OPEN);
if (!session) break;
dout(10) << "new stale session " << session->inst << " last " << session->last_cap_renew << dendl;
mds->sessionmap.set_state(session, Session::STATE_STALE);
mds->locker->revoke_stale_caps(session);
+ mds->locker->remove_stale_leases(session);
mds->messenger->send_message(new MClientSession(CEPH_SESSION_STALE, session->get_push_seq()),
session->inst);
}
- // dead
+ // autoclose
cutoff = now;
cutoff -= g_conf.mds_session_autoclose;
while (1) {