From 85a3a2f1ae281e3de6116f770071b60057024d0a Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 28 Mar 2008 16:29:18 -0700 Subject: [PATCH] kclient: keep leases in per-session lru lists. locking needs some work still --- src/TODO | 6 +- src/kernel/dir.c | 22 +++++-- src/kernel/inode.c | 138 +++++++++++++++++++++++++++++++++++----- src/kernel/mds_client.c | 50 +++++++++++++-- src/kernel/mds_client.h | 1 + src/kernel/super.c | 3 +- src/kernel/super.h | 29 +++++++-- 7 files changed, 214 insertions(+), 35 deletions(-) diff --git a/src/TODO b/src/TODO index 1a0c672f0f247..ce8cfbadb4a6b 100644 --- a/src/TODO +++ b/src/TODO @@ -27,7 +27,11 @@ userspace client - reference count lease validations on path lookup? kernel client -- link leased inode/dentries into mds_session +- use list_for_each_safe for caps removal? + - revisit cap removal locking, make sure it's okay.... +- fix dentry locking on lease addition +- trim expired leases so we don't indefinitely hold dcache refs... +- move readdir result prepopulation inside reply handler (not in caller context!) - carry wrbuffer/rdcache caps until data is flushed - this should make the utimes bit kick in - make sure link/unlink results reflected by inode/dentry cache (let fill_trace do it? invalidate? do actual update?) diff --git a/src/kernel/dir.c b/src/kernel/dir.c index ebcc99077d5d8..d418e104437ec 100644 --- a/src/kernel/dir.c +++ b/src/kernel/dir.c @@ -97,7 +97,8 @@ static unsigned fpos_off(loff_t p) static int prepopulate_dir(struct dentry *parent, struct ceph_mds_reply_info *rinfo, - int from_mds, unsigned long from_time) + struct ceph_mds_session *session, + unsigned long from_time) { struct qstr dname; struct dentry *dn; @@ -123,7 +124,7 @@ static int prepopulate_dir(struct dentry *parent, ceph_init_dentry(dn); } ceph_update_dentry_lease(dn, rinfo->dir_dlease[i], - from_mds, from_time); + session, from_time); /* inode */ if (dn->d_inode == NULL) { @@ -156,7 +157,7 @@ static int prepopulate_dir(struct dentry *parent, return -1; } } - ceph_update_inode_lease(in, rinfo->dir_ilease[i], from_mds, + ceph_update_inode_lease(in, rinfo->dir_ilease[i], session, from_time); dput(dn); @@ -206,7 +207,7 @@ nextfrag: /* pre-populate dentry cache */ prepopulate_dir(filp->f_dentry, &req->r_reply_info, - le32_to_cpu(req->r_reply->hdr.src.name.num), + req->r_session, req->r_from_time); } @@ -570,7 +571,7 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) mds, jiffies); /* does dir inode lease or cap cover it? */ - if (dirci->i_lease_mds >= 0 && + if (dirci->i_lease_session && time_after(dirci->i_lease_ttl, jiffies) && (dirci->i_lease_mask & CEPH_LOCK_ICONTENT)) { dout(20, "d_revalidate have ICONTENT on dir inode %p, ok\n", @@ -594,6 +595,16 @@ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) return 0; } +static void ceph_d_release(struct dentry *dentry) +{ + struct ceph_dentry_info *di; + if (dentry->d_fsdata) { + di = ceph_dentry(dentry); + list_del(&di->lease_item); + kfree(di); + } +} + const struct inode_operations ceph_dir_iops = { .lookup = ceph_dir_lookup, @@ -611,5 +622,6 @@ const struct inode_operations ceph_dir_iops = { struct dentry_operations ceph_dentry_ops = { .d_revalidate = ceph_d_revalidate, + .d_release = ceph_d_release, }; diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 245cec89f363d..b7d499ece0e2b 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -27,7 +27,8 @@ int ceph_get_inode(struct super_block *sb, __u64 ino, struct inode **pinode) #if BITS_PER_LONG == 64 *pinode = iget_locked(sb, ino); #else - *pinode = iget5_locked(sb, inot, ceph_ino_compare, ceph_set_ino_cb, &ino); + *pinode = iget5_locked(sb, inot, ceph_ino_compare, ceph_set_ino_cb, + &ino); #endif if (*pinode == NULL) return -ENOMEM; @@ -40,7 +41,8 @@ int ceph_get_inode(struct super_block *sb, __u64 ino, struct inode **pinode) #endif ci->i_hashval = (*pinode)->i_ino; - dout(30, "get_inode on %lu=%llx got %p\n", (*pinode)->i_ino, ino, *pinode); + dout(30, "get_inode on %lu=%llx got %p\n", (*pinode)->i_ino, ino, + *pinode); return 0; } @@ -153,9 +155,14 @@ int ceph_fill_inode(struct inode *inode, struct ceph_mds_reply_inode *info) return 0; } +/* + * inode lease lock order is + * inode->i_lock + * session->s_cap_lock + */ void ceph_update_inode_lease(struct inode *inode, struct ceph_mds_reply_lease *lease, - int from_mds, + struct ceph_mds_session *session, unsigned long from_time) { struct ceph_inode_info *ci = ceph_inode(inode); @@ -167,19 +174,56 @@ void ceph_update_inode_lease(struct inode *inode, dout(10, "update_inode_lease %p mask %d duration %d ms ttl %llu\n", inode, le16_to_cpu(lease->mask), le32_to_cpu(lease->duration_ms), ttl); + + if (lease->mask == 0) + return; + + spin_lock(&inode->i_lock); if (ttl > ci->i_lease_ttl) { ci->i_lease_ttl = ttl; ci->i_lease_mask = le16_to_cpu(lease->mask); - ci->i_lease_mds = from_mds; + if (ci->i_lease_session) { + spin_lock(&ci->i_lease_session->s_cap_lock); + list_del(&ci->i_lease_item); + spin_unlock(&ci->i_lease_session->s_cap_lock); + } + ci->i_lease_session = session; + spin_lock(&session->s_cap_lock); + list_add(&ci->i_lease_item, &session->s_inode_leases); + spin_lock(&session->s_cap_lock); } + spin_unlock(&inode->i_lock); } +void ceph_revoke_inode_lease(struct ceph_inode_info *ci, int mask) +{ + int drop = 0; + spin_lock(&ci->vfs_inode.i_lock); + ci->i_lease_mask &= ~mask; + if (ci->i_lease_mask == 0) { + spin_lock(&ci->i_lease_session->s_cap_lock); + list_del(&ci->i_lease_item); + spin_unlock(&ci->i_lease_session->s_cap_lock); + ci->i_lease_session = 0; + } + spin_unlock(&ci->vfs_inode.i_lock); + if (drop) + iput(&ci->vfs_inode); +} + +/* + * dentry lease lock order is + * dentry->d_lock + * session->s_cap_lock + */ void ceph_update_dentry_lease(struct dentry *dentry, struct ceph_mds_reply_lease *lease, - int from_mds, + struct ceph_mds_session *session, unsigned long from_time) { __u64 ttl = le32_to_cpu(lease->duration_ms) * HZ; + struct ceph_dentry_info *di; + int is_new = 0; do_div(ttl, 1000); ttl += from_time; @@ -187,18 +231,81 @@ void ceph_update_dentry_lease(struct dentry *dentry, dout(10, "update_dentry_lease %p mask %d duration %d ms ttl %llu\n", dentry, le16_to_cpu(lease->mask), le32_to_cpu(lease->duration_ms), ttl); - if (lease->mask) { - if (ttl > dentry->d_time) { - dentry->d_time = ttl; - dentry->d_fsdata = (void *)(long)from_mds; + if (lease->mask == 0) + return; + + spin_lock(&dentry->d_lock); + if (ttl < dentry->d_time) + goto fail_unlock; /* older. */ + + di = ceph_dentry(dentry); + if (!di) { + spin_unlock(&dentry->d_lock); + di = kmalloc(sizeof(struct ceph_dentry_info), + GFP_KERNEL); + if (!di) + return; /* oh well */ + spin_lock(&dentry->d_lock); + if (dentry->d_fsdata) { /* lost a race! */ + kfree(di); + goto fail_unlock; } - } else { - dentry->d_fsdata = (void *)(long)-1; /* invalidate */ + is_new = 1; + di->dentry = dentry; + dentry->d_fsdata = di; } + + /* update */ + dentry->d_time = ttl; + + /* (re)add to session lru */ + if (di->lease_session) { + spin_lock(&di->lease_session->s_cap_lock); + list_del(&di->lease_item); + spin_unlock(&di->lease_session->s_cap_lock); + } + di->lease_session = session; + spin_lock(&session->s_cap_lock); + list_add(&di->lease_item, &session->s_dentry_leases); + spin_unlock(&session->s_cap_lock); + + spin_unlock(&dentry->d_lock); + if (is_new) + dget(dentry); + return; + +fail_unlock: + spin_unlock(&dentry->d_lock); } +void ceph_revoke_dentry_lease(struct dentry *dentry) +{ + struct ceph_dentry_info *di; + struct ceph_mds_session *session; + int drop = 0; + + /* detach from dentry */ + spin_lock(&dentry->d_lock); + di = ceph_dentry(dentry); + if (di) { + session = di->lease_session; + spin_lock(&session->s_cap_lock); + list_del(&di->lease_item); + spin_unlock(&session->s_cap_lock); + kfree(di); + drop = 1; + dentry->d_fsdata = 0; + } + spin_unlock(&dentry->d_lock); + if (drop) + dput(dentry); +} + + + + int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, - int mds) + struct ceph_mds_session *session) { struct ceph_mds_reply_info *rinfo = &req->r_reply_info; int err = 0; @@ -233,7 +340,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, err = ceph_fill_inode(in, rinfo->trace_in[0].in); if (err < 0) return err; - ceph_update_inode_lease(in, rinfo->trace_ilease[0], mds, + ceph_update_inode_lease(in, rinfo->trace_ilease[0], session, req->r_from_time); if (sb->s_root == NULL) @@ -272,7 +379,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, } } ceph_update_dentry_lease(dn, rinfo->trace_dlease[d], - mds, req->r_from_time); + session, req->r_from_time); /* inode */ if (d+1 == rinfo->trace_numi) { @@ -320,7 +427,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, } } ceph_update_inode_lease(dn->d_inode, rinfo->trace_ilease[d+1], - mds, req->r_from_time); + session, req->r_from_time); dput(parent); parent = NULL; } @@ -838,6 +945,7 @@ int ceph_inode_revalidate(struct inode *inode, int mask) } else { dout(10, "inode_revalidate %p have %d want %d, lease expired\n", inode, havemask, mask); + ceph_revoke_inode_lease(ci, mask); } return ceph_do_lookup(inode->i_sb, d_find_alias(inode), mask); } diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index dcf497a876add..ade40e229c201 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -289,6 +289,8 @@ __register_session(struct ceph_mds_client *mdsc, int mds) s->s_cap_seq = 0; spin_lock_init(&s->s_cap_lock); INIT_LIST_HEAD(&s->s_caps); + INIT_LIST_HEAD(&s->s_inode_leases); + INIT_LIST_HEAD(&s->s_dentry_leases); s->s_nr_caps = 0; atomic_set(&s->s_ref, 1); init_completion(&s->s_completion); @@ -617,6 +619,37 @@ static void remove_session_caps(struct ceph_mds_session *session) spin_unlock(&session->s_cap_lock); } +static void remove_session_leases(struct ceph_mds_session *session) +{ + struct ceph_inode_info *ci; + struct ceph_dentry_info *di; + struct list_head *p, *n; + + dout(10, "remove_session_leases on %p\n", session); + + spin_lock(&session->s_cap_lock); + + /* inodes */ + while (!list_empty(&session->s_inode_leases)) { + ci = list_entry(session->s_inode_leases.next, + struct ceph_inode_info, i_lease_item); + dout(10, "removing lease from inode %p\n", &ci->vfs_inode); + spin_unlock(&session->s_cap_lock); + ceph_revoke_inode_lease(ci, ci->i_lease_mask); + spin_lock(&session->s_cap_lock); + } + + /* dentries */ + while (!list_empty(&session->s_dentry_leases)) { + di = list_entry(session->s_dentry_leases.next, + struct ceph_dentry_info, lease_item); + dout(10, "removing lease from dentry %p\n", di->dentry); + spin_unlock(&session->s_cap_lock); + ceph_revoke_dentry_lease(di->dentry); + spin_lock(&session->s_cap_lock); + } +} + void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc, struct ceph_msg *msg) { @@ -656,6 +689,7 @@ void ceph_mdsc_handle_session(struct ceph_mds_client *mdsc, seq, session->s_cap_seq); } remove_session_caps(session); + remove_session_leases(session); complete(&mdsc->session_close_waiters); break; @@ -892,7 +926,7 @@ void ceph_mdsc_handle_reply(struct ceph_mds_client *mdsc, struct ceph_msg *msg) dout(10, "handle_reply tid %lld result %d\n", tid, result); /* insert trace into our cache */ - err = ceph_fill_trace(mdsc->client->sb, req, mds); + err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); if (err) goto done; if (result == 0 && req->r_expects_cap) { @@ -1416,7 +1450,7 @@ void ceph_mdsc_handle_lease(struct ceph_mds_client *mdsc, struct ceph_msg *msg) /* inode */ ci = ceph_inode(inode); if (mask & ci->i_lease_mask) { - ci->i_lease_mask &= ~mask; + ceph_revoke_inode_lease(ci, mask); dout(10, "lease mask %d revoked on inode %p, still have %d\n", mask, inode, ci->i_lease_mask); } @@ -1432,7 +1466,7 @@ void ceph_mdsc_handle_lease(struct ceph_mds_client *mdsc, struct ceph_msg *msg) dentry = d_lookup(parent, &dname); if (!dentry) goto release; - dentry->d_time = 0; + ceph_revoke_dentry_lease(dentry); dout(10, "lease revoked on dentry %p\n", dentry); } @@ -1457,6 +1491,7 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_msg *msg; struct ceph_mds_lease *lease; struct ceph_inode_info *ci; + struct ceph_dentry_info *di; int origmask = mask; int mds = -1; int len = sizeof(*lease) + sizeof(__u32); @@ -1464,8 +1499,9 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, __u64 ino; BUG_ON(inode == 0); - if ((mask & CEPH_LOCK_DN) && dentry) { - mds = (long)dentry->d_fsdata; + if ((mask & CEPH_LOCK_DN) && dentry->d_fsdata) { + di = ceph_dentry(dentry); + mds = di->lease_session->s_mds; if (mds >= 0 && time_before(jiffies, dentry->d_time)) { dnamelen = dentry->d_name.len; len += dentry->d_name.len; @@ -1474,9 +1510,9 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, } ci = ceph_inode(inode); ino = ci->i_ceph_ino; - if (ci->i_lease_mds >= 0 && time_after(ci->i_lease_ttl, jiffies)) { + if (ci->i_lease_session && time_after(ci->i_lease_ttl, jiffies)) { mask &= ci->i_lease_mask; /* lease is valid */ - mds = ci->i_lease_mds; + mds = ci->i_lease_session->s_mds; } else mask &= CEPH_LOCK_DN; /* no lease; clear all but DN bits */ if (mask == 0) { diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 2d2172dd76796..479d3a427a988 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -57,6 +57,7 @@ struct ceph_mds_session { __u64 s_cap_seq; /* cap message count/seq from mds */ spinlock_t s_cap_lock; struct list_head s_caps; + struct list_head s_inode_leases, s_dentry_leases; int s_nr_caps; atomic_t s_ref; struct completion s_completion; diff --git a/src/kernel/super.c b/src/kernel/super.c index 1bf5e51aae3d9..43428778dc3f5 100644 --- a/src/kernel/super.c +++ b/src/kernel/super.c @@ -108,9 +108,10 @@ static struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_symlink = 0; - ci->i_lease_mds = -1; + ci->i_lease_session = 0; ci->i_lease_mask = 0; ci->i_lease_ttl = 0; + INIT_LIST_HEAD(&ci->i_lease_item); ci->i_fragtree = ci->i_fragtree_static; ci->i_fragtree->nsplits = 0; diff --git a/src/kernel/super.h b/src/kernel/super.h index 25b768843f3f6..1a0d0f754c82a 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -147,8 +147,10 @@ struct ceph_inode_info { char *i_symlink; - int i_lease_mask, i_lease_mds; + int i_lease_mask; + struct ceph_mds_session *i_lease_session; long unsigned i_lease_ttl; /* jiffies */ + struct list_head i_lease_item; /* mds session list */ struct ceph_frag_tree_head *i_fragtree, i_fragtree_static[1]; int i_frag_map_nr; @@ -175,6 +177,20 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) return list_entry(inode, struct ceph_inode_info, vfs_inode); } +struct ceph_dentry_info { + struct dentry *dentry; + struct ceph_mds_session *lease_session; + struct list_head lease_item; /* mds session list */ +}; + +static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) +{ + return (struct ceph_dentry_info *)dentry->d_fsdata; +} + +extern void ceph_revoke_inode_lease(struct ceph_inode_info *ci, int mask); +extern void ceph_revoke_dentry_lease(struct dentry *dentry); + /* * ino_t is <64 bits on many architectures... blech */ @@ -184,7 +200,6 @@ static inline ino_t ceph_ino_to_ino(u64 cephino) #if BITS_PER_LONG == 32 ino ^= cephino >> (sizeof(u64)-sizeof(ino_t)) * 8; #endif - return ino; } @@ -198,7 +213,6 @@ static inline void ceph_set_ino(struct inode *inode, __u64 ino) static inline int ceph_set_ino_cb(struct inode *inode, void *data) { ceph_set_ino(inode, *(__u64 *)data); - return 0; } @@ -319,10 +333,12 @@ extern int ceph_fill_inode(struct inode *inode, extern void ceph_update_inode_lease(struct inode *inode, struct ceph_mds_reply_lease *lease, - int from_mds, unsigned long from_time); + struct ceph_mds_session *seesion, + unsigned long from_time); extern void ceph_update_dentry_lease(struct dentry *dentry, struct ceph_mds_reply_lease *lease, - int from_mds, unsigned long from_time); + struct ceph_mds_session *session, + unsigned long from_time); extern struct ceph_inode_cap *ceph_find_cap(struct inode *inode, int want); extern struct ceph_inode_cap *ceph_add_cap(struct inode *inode, @@ -364,7 +380,8 @@ extern struct dentry_operations ceph_dentry_ops; extern char *ceph_build_dentry_path(struct dentry *dentry, int *len); extern int ceph_fill_trace(struct super_block *sb, - struct ceph_mds_request *req, int mds); + struct ceph_mds_request *req, + struct ceph_mds_session *session); extern int ceph_do_lookup(struct super_block *sb, struct dentry *dentry, int m); static inline void ceph_init_dentry(struct dentry *dentry) { -- 2.39.5