}
/*
- * inode lease lock order is
- * inode->i_lock
- * session->s_cap_lock
+ * caller should hold session s_mutex.
*/
void ceph_update_inode_lease(struct inode *inode,
struct ceph_mds_reply_lease *lease,
ci->i_lease_ttl = ttl;
ci->i_lease_mask = le16_to_cpu(lease->mask);
if (ci->i_lease_session) {
- spin_lock(&ci->i_lease_session->s_cap_lock);
list_del(&ci->i_lease_item);
- spin_unlock(&ci->i_lease_session->s_cap_lock);
} else
is_new = 1;
ci->i_lease_session = session;
- spin_lock(&session->s_cap_lock);
list_add(&ci->i_lease_item, &session->s_inode_leases);
- spin_unlock(&session->s_cap_lock);
}
spin_unlock(&inode->i_lock);
if (is_new) {
&ci->vfs_inode, ci->i_lease_mask, ci->i_lease_mask & ~mask);
ci->i_lease_mask &= ~mask;
if (ci->i_lease_mask == 0) {
- spin_lock(&ci->i_lease_session->s_cap_lock);
list_del(&ci->i_lease_item);
- spin_unlock(&ci->i_lease_session->s_cap_lock);
ci->i_lease_session = 0;
drop = 1;
}
/*
- * dentry lease lock order is
- * dentry->d_lock
- * session->s_cap_lock
+ * caller should hold session s_mutex.
*/
void ceph_update_dentry_lease(struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
dentry->d_time = ttl;
/* (re)add to session lru */
- if (!is_new && di->lease_session) {
- spin_lock(&di->lease_session->s_cap_lock);
+ if (!is_new && di->lease_session)
list_del(&di->lease_item);
- spin_unlock(&di->lease_session->s_cap_lock);
- }
di->lease_session = session;
- spin_lock(&session->s_cap_lock);
list_add(&di->lease_item, &session->s_dentry_leases);
- spin_unlock(&session->s_cap_lock);
spin_unlock(&dentry->d_lock);
if (is_new) {
di = ceph_dentry(dentry);
if (di) {
session = di->lease_session;
- spin_lock(&session->s_cap_lock);
list_del(&di->lease_item);
- spin_unlock(&session->s_cap_lock);
kfree(di);
drop = 1;
dentry->d_fsdata = 0;
}
/*
- * lock ordering is
- * inode->i_lock
- * session->s_cap_lock
+ * caller shoudl hold session s_mutex.
*/
struct ceph_inode_cap *ceph_add_cap(struct inode *inode,
struct ceph_mds_session *session,
/* add to session cap list */
cap->session = session;
- spin_lock(&session->s_cap_lock);
list_add(&cap->session_caps, &session->s_caps);
session->s_nr_caps++;
- spin_unlock(&session->s_cap_lock);
}
dout(10, "add_cap inode %p (%llx) got cap %xh now %xh seq %d from %d\n",
return have;
}
+/*
+ * caller should hold i_lock and session s_mutex.
+ */
void __ceph_remove_cap(struct ceph_inode_cap *cap)
{
struct ceph_mds_session *session = cap->session;
dout(10, "__ceph_remove_cap %p from %p\n", cap, &cap->ci->vfs_inode);
/* remove from session list */
- spin_lock(&session->s_cap_lock);
list_del(&cap->session_caps);
session->s_nr_caps--;
- spin_unlock(&session->s_cap_lock);
/* remove from inode list */
list_del(&cap->ci_caps);
kfree(cap);
}
+/*
+ * caller should hold session s_mutex.
+ */
void ceph_remove_cap(struct ceph_inode_cap *cap)
{
struct inode *inode = &cap->ci->vfs_inode;
__u64 size, max_size;
struct timespec mtime, atime;
int mds;
+ struct ceph_mds_session *session = 0; /* if non-NULL, i hold s_mutex */
retry:
spin_lock(&ci->vfs_inode.i_lock);
int revoking, dropping;
cap = list_entry(p, struct ceph_inode_cap, ci_caps);
+ /* note: no side-effects allowed, until we take s_mutex */
+ revoking = cap->implemented & ~cap->issued;
+
if (ci->i_wanted_max_size > ci->i_max_size &&
ci->i_wanted_max_size > ci->i_requested_max_size)
goto ack;
/* completed revocation? */
- revoking = cap->implemented & ~cap->issued;
- dout(20, "cap %p issued %d impl %d revoking %d used %d\n",
- cap, cap->issued, cap->implemented, revoking, used);
if (revoking && (revoking && used) == 0) {
dout(10, "completed revocation of %d\n",
cap->implemented & ~cap->issued);
- cap->implemented = cap->issued;
goto ack;
}
(ci->vfs_inode.i_size << 1) >= ci->i_max_size &&
(ci->i_reported_size << 1) < ci->i_max_size) {
dout(10, "i_size approaching max_size\n");
- ci->i_reported_size = ci->vfs_inode.i_size;
goto ack;
}
continue; /* nothing extra, all good */
ack:
+ /* take s_mutex, one way or another */
+ if (session && session != cap->session) {
+ dout(30, "oops, wrong session mutex\n");
+ up(&session->s_mutex);
+ session = 0;
+ }
+ if (!session) {
+ session = cap->session;
+ if (down_trylock(&session->s_mutex) != 0) {
+ dout(10, "inverting session/inode locking\n");
+ spin_unlock(&ci->vfs_inode.i_lock);
+ down(&session->s_mutex);
+ spin_unlock(&ci->vfs_inode.i_lock);
+ goto retry;
+ }
+ }
+
+ /* ok */
dropping = cap->issued & ~wanted;
+ if (dropping & CEPH_CAP_RDCACHE) {
+ dout(20, "invalidating pages on %p\n", &ci->vfs_inode);
+ invalidate_mapping_pages(&ci->vfs_inode.i_data, 0, -1);
+ }
cap->issued &= wanted; /* drop bits we don't want */
+ if (revoking && (revoking && used) == 0)
+ cap->implemented = cap->issued;
+
keep = cap->issued;
seq = cap->seq;
size = ci->vfs_inode.i_size;
+ ci->i_reported_size = size;
max_size = ci->i_wanted_max_size;
ci->i_requested_max_size = max_size;
mtime = ci->vfs_inode.i_mtime;
keep, wanted, seq,
size, max_size, &mtime, &atime, mds);
- if (dropping & CEPH_CAP_RDCACHE) {
- dout(20, "invalidating pages on %p\n", &ci->vfs_inode);
- invalidate_mapping_pages(&ci->vfs_inode.i_data, 0, -1);
- }
if (wanted == 0)
iput(&ci->vfs_inode); /* removed cap */
+ up(&session->s_mutex);
goto retry;
}
/* okay */
spin_unlock(&ci->vfs_inode.i_lock);
+
+ if (session)
+ up(&session->s_mutex);
}
void ceph_inode_set_size(struct inode *inode, loff_t size)
write_inode_now(&ci->vfs_inode, 0);
}
-
void apply_truncate(struct inode *inode, loff_t size)
{
struct ceph_inode_info *ci = ceph_inode(inode);
s->s_mds = mds;
s->s_state = CEPH_MDS_SESSION_NEW;
s->s_cap_seq = 0;
- spin_lock_init(&s->s_cap_lock);
+ init_MUTEX(&s->s_mutex);
INIT_LIST_HEAD(&s->s_caps);
INIT_LIST_HEAD(&s->s_inode_leases);
INIT_LIST_HEAD(&s->s_dentry_leases);
dout(10, "resume_session to mds%d\n", mds);
/* note cap staleness */
- spin_lock(&session->s_cap_lock);
list_for_each(cp, &session->s_caps) {
cap = list_entry(cp, struct ceph_inode_cap, session_caps);
cap->issued = cap->implemented = 0;
}
- spin_unlock(&session->s_cap_lock);
session->s_state = CEPH_MDS_SESSION_RESUMING;
* we don't deadlock with __remove_cap in inode.c.
*/
dout(10, "remove_session_caps on %p\n", session);
- spin_lock(&session->s_cap_lock);
while (session->s_nr_caps > 0) {
cap = list_entry(session->s_caps.next, struct ceph_inode_cap,
session_caps);
ci = cap->ci;
dout(10, "removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
- spin_unlock(&session->s_cap_lock);
ceph_remove_cap(cap);
- spin_lock(&session->s_cap_lock);
}
BUG_ON(session->s_nr_caps > 0);
- spin_unlock(&session->s_cap_lock);
}
static void remove_session_leases(struct ceph_mds_session *session)
dout(10, "remove_session_leases on %p\n", session);
- spin_lock(&session->s_cap_lock);
-
/* inodes */
while (!list_empty(&session->s_inode_leases)) {
ci = list_entry(session->s_inode_leases.next,
struct ceph_inode_info, i_lease_item);
dout(10, "removing lease from inode %p\n", &ci->vfs_inode);
- spin_unlock(&session->s_cap_lock);
ceph_revoke_inode_lease(ci, ci->i_lease_mask);
- spin_lock(&session->s_cap_lock);
}
/* dentries */
di = list_entry(session->s_dentry_leases.next,
struct ceph_dentry_info, lease_item);
dout(10, "removing lease from dentry %p\n", di->dentry);
- spin_unlock(&session->s_cap_lock);
ceph_revoke_dentry_lease(di->dentry);
- spin_lock(&session->s_cap_lock);
}
-
- spin_unlock(&session->s_cap_lock);
}
void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc,
{
__u32 op;
__u64 seq;
- struct ceph_mds_session *session;
+ struct ceph_mds_session *session = 0;
int mds = le32_to_cpu(msg->hdr.src.name.num);
struct ceph_mds_session_head *h = msg->front.iov_base;
/* handle */
spin_lock(&mdsc->lock);
session = __get_session(mdsc, mds);
+ down(&session->s_mutex);
+
dout(1, "handle_session %p op %d seq %llu\n", session, op, seq);
switch (op) {
case CEPH_SESSION_OPEN:
put_session(session);
spin_unlock(&mdsc->lock);
-out:
+ up(&session->s_mutex);
return;
bad:
dout(1, "corrupt session message, len %d, expected %d\n",
(int)msg->front.iov_len, (int)sizeof(*h));
- goto out;
+ return;
}
BUG_ON(req->r_reply);
spin_unlock(&mdsc->lock);
+ down(&req->r_session->s_mutex);
+
/* parse */
rinfo = &req->r_reply_info;
err = parse_reply_info(msg, rinfo);
}
done:
+ up(&req->r_session->s_mutex);
spin_lock(&mdsc->lock);
if (err) {
req->r_reply = ERR_PTR(err);
session->s_state = CEPH_MDS_SESSION_RECONNECTING;
/* estimate needed space */
- spin_lock(&session->s_cap_lock);
len += session->s_nr_caps *
sizeof(struct ceph_mds_cap_reconnect);
len += session->s_nr_caps * (100); /* guess! */
dout(40, "estimating i need %d bytes for %d caps\n",
len, session->s_nr_caps);
- spin_unlock(&session->s_cap_lock);
} else {
dout(20, "no session for mds%d, will send short reconnect\n",
mds);
spin_unlock(&mdsc->lock); /* drop lock for duration */
+ if (session)
+ down(&session->s_mutex);
+
retry:
/* build reply */
reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, len, 0, 0, 0);
dout(10, "session %p state %d\n", session, session->s_state);
/* traverse this session's caps */
- spin_lock(&session->s_cap_lock);
ceph_encode_8(&p, 0);
ceph_encode_32(&p, session->s_nr_caps);
count = 0;
dput(dentry);
count++;
}
- spin_unlock(&session->s_cap_lock);
send:
reply->front.iov_len = p - reply->front.iov_base;
dout(0, "WARNING: reconnect on %p raced and lost?\n",
session);
}
+ }
+out:
+ if (session) {
+ up(&session->s_mutex);
put_session(session);
}
return;
dout(30, "i guessed %d, and did %d of %d, retrying with %d\n",
len, count, session->s_nr_caps, newlen);
len = newlen;
- spin_unlock(&session->s_cap_lock);
ceph_msg_put(reply);
goto retry;
derr(0, "error %d generating reconnect. what to do?\n", err);
/* fixme */
BUG_ON(1);
+ goto out;
}
/*
max_size = le64_to_cpu(h->max_size);
/* find session */
+ spin_lock(&mdsc->lock);
session = __get_session(&client->mdsc, mds);
+ spin_unlock(&mdsc->lock);
if (!session) {
dout(10, "WTF, got filecap but no session for mds%d\n", mds);
return;
}
+
+ down(&session->s_mutex);
session->s_cap_seq++;
/* lookup ino */
#endif
dout(20, "op is %d, ino %llx %p\n", op, ino, inode);
- if (inode && ceph_ino(inode) != ino) {
- BUG_ON(sizeof(ino_t) >= sizeof(u64));
- dout(10, "UH OH, lame ceph ino %llx -> %lu ino_t hash collided?"
- " inode is %llx\n", ino, inot, ceph_ino(inode));
- inode = 0;
- }
-
if (!inode) {
dout(10, "wtf, i don't have ino %lu=%llx? closing out cap\n",
inot, ino);
ceph_mdsc_send_cap_ack(mdsc, ino, 0, 0, seq,
size, 0, 0, 0, mds);
- return;
+ goto no_inode;
}
switch (op) {
}
iput(inode);
+no_inode:
+ up(&session->s_mutex);
return;
+
bad:
dout(10, "corrupt filecaps message\n");
return;
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(__u32);
/* find session */
+ spin_lock(&mdsc->lock);
session = __get_session(mdsc, mds);
+ spin_unlock(&mdsc->lock);
if (!session) {
dout(10, "WTF, got lease but no session for mds%d\n", mds);
return;
}
session->s_cap_seq++;
+ down(&session->s_mutex);
+
/* lookup inode */
inot = ceph_ino_to_ino(ino);
#if BITS_PER_LONG == 64
h->action = CEPH_MDS_LEASE_RELEASE;
ceph_msg_get(msg);
send_msg_mds(mdsc, msg, mds);
+ up(&session->s_mutex);
return;
bad: