From 9c7916891148e8bd5e2d1e718e93d626b2f3f093 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 4 Mar 2009 15:43:32 -0800 Subject: [PATCH] kclient: use helper to clearly define which caps belond on rdcaps Adjust list membership in ceph_add_cap, release, flush_ack, and ceph_put_fmode paths. Use a spinlock to protect list adjustments because ceph_put_fmode does not have the session mutex. Make __cap_is_valid() to check if we are on the rdcaps list or not. Remove unneeded extra checks in rdcaps trimmer; replace with a WARN. --- src/kernel/caps.c | 127 ++++++++++++++++++++++++++++++++-------- src/kernel/inode.c | 22 ------- src/kernel/mds_client.c | 1 + src/kernel/mds_client.h | 1 + src/kernel/super.h | 14 ++--- 5 files changed, 110 insertions(+), 55 deletions(-) diff --git a/src/kernel/caps.c b/src/kernel/caps.c index 5f5b92f210997..18c7417e5a23d 100644 --- a/src/kernel/caps.c +++ b/src/kernel/caps.c @@ -165,6 +165,40 @@ static void __insert_cap_node(struct ceph_inode_info *ci, rb_insert_color(&new->ci_node, &ci->i_caps); } +/* + * Remove or place the given cap on the session 'rdcaps' list (or + * read-only, expireable caps). A cap belongs on the rdcaps list IFF + * it does not include any non-expireable caps. + * + * The rdcaps list is protected by a separate spinlock because + * ceph_put_fmode may need to add caps to it when wanted goes to 0. + */ +static void __adjust_cap_rdcaps_listing(struct ceph_inode_info *ci, + struct ceph_cap *cap, + int wanted) +{ + int caps = cap->issued | cap->flushing | ci->i_dirty_caps; + + spin_lock(&cap->session->s_rdcaps_lock); + if ((caps & ~CEPH_CAP_EXPIREABLE) == 0 && + wanted == 0) { + /* move to tail of session rdcaps lru? */ + if (!list_empty(&cap->session_rdcaps)) + list_del_init(&cap->session_rdcaps); + else + dout(20, "adjust_cap_rdcaps_listing %p added %p\n", + &ci->vfs_inode, cap); + list_add_tail(&cap->session_rdcaps, &cap->session->s_rdcaps); + } else { + if (!list_empty(&cap->session_rdcaps)) { + dout(20, "adjust_cap_rdcaps_listing %p removed %p\n", + &ci->vfs_inode, cap); + list_del_init(&cap->session_rdcaps); + } + } + spin_unlock(&cap->session->s_rdcaps_lock); +} + /* * Add a capability under the given MDS session, after processing * the snapblob (to update the snap realm hierarchy). @@ -190,6 +224,14 @@ int ceph_add_cap(struct inode *inode, dout(10, "add_cap %p mds%d cap %llx %s seq %d\n", inode, session->s_mds, cap_id, ceph_cap_string(issued), seq); + + /* + * If we are opening the file, include file mode wanted bits + * in wanted. Needed by adjust_cap_rdcaps_listing. + */ + if (fmode >= 0) + wanted |= ceph_caps_for_mode(fmode); + retry: spin_lock(&inode->i_lock); cap = __get_cap_for_mds(inode, mds); @@ -228,11 +270,7 @@ retry: INIT_LIST_HEAD(&cap->session_rdcaps); } - /* move to tail of session rdcaps lru? */ - if (!list_empty(&cap->session_rdcaps)) - list_del(&cap->session_rdcaps); - if ((cap->issued & ~CEPH_CAP_EXPIREABLE) == 0) - list_add_tail(&cap->session_rdcaps, &session->s_rdcaps); + __adjust_cap_rdcaps_listing(ci, cap, __ceph_caps_wanted(ci) | wanted); if (!ci->i_snap_realm) { struct ceph_snap_realm *realm = ceph_get_snap_realm(mdsc, @@ -258,8 +296,8 @@ retry: ci->i_ceph_flags &= ~CEPH_I_COMPLETE; } - dout(10, "add_cap inode %p (%llx.%llx) cap %s now %s seq %d mds%d\n", - inode, ceph_vinop(inode), ceph_cap_string(issued), + dout(10, "add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n", + inode, ceph_vinop(inode), cap, ceph_cap_string(issued), ceph_cap_string(issued|cap->issued), seq, mds); cap->cap_id = cap_id; cap->issued = issued; @@ -300,9 +338,9 @@ static int __cap_is_valid(struct ceph_cap *cap) } if (time_after_eq(jiffies, cap->expires) && - (cap->issued & ~CEPH_CAP_EXPIREABLE) == 0) { + !list_empty(&cap->session_rdcaps)) { dout(30, "__cap_is_valid %p cap %p issued %s " - "but readonly and expired\n", &cap->ci->vfs_inode, + "but rdcap and expired\n", &cap->ci->vfs_inode, cap, ceph_cap_string(cap->issued)); return 0; } @@ -618,7 +656,8 @@ static void __send_cap(struct ceph_mds_client *mdsc, /* close out cap? */ if (flushing == 0 && keep == 0) last_cap = __ceph_remove_cap(cap); - + else + __adjust_cap_rdcaps_listing(ci, cap, __ceph_caps_wanted(ci)); spin_unlock(&inode->i_lock); if (dropping & CEPH_CAP_FILE_RDCACHE) { @@ -1418,6 +1457,8 @@ static void handle_cap_flush_ack(struct inode *inode, if (removed_last) __cap_delay_cancel(&ceph_inode_to_client(inode)->mdsc, ci); + } else { + __adjust_cap_rdcaps_listing(ci, cap, __ceph_caps_wanted(ci)); } spin_unlock(&inode->i_lock); if (removed_last) @@ -1779,20 +1820,25 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) /* - * caller must hold session s_mutex + * Caller must hold session s_mutex. + * + * Note that _removals_ to s_rdcaps are protected by s_mutex and + * s_rdcaps_lock spinlock, but additions are protected only by + * s_rdcaps_lock (see ceph_put_fmode). */ void ceph_trim_session_rdcaps(struct ceph_mds_session *session) { struct inode *inode; struct ceph_cap *cap; struct list_head *p, *n; - int wanted; dout(10, "trim_rdcaps for mds%d\n", session->s_mds); + spin_lock(&session->s_rdcaps_lock); list_for_each_safe(p, n, &session->s_rdcaps) { int last_cap = 0; cap = list_entry(p, struct ceph_cap, session_rdcaps); + spin_unlock(&session->s_rdcaps_lock); inode = &cap->ci->vfs_inode; spin_lock(&inode->i_lock); @@ -1801,22 +1847,51 @@ void ceph_trim_session_rdcaps(struct ceph_mds_session *session) dout(20, " stopping at %p cap %p expires %lu > %lu\n", inode, cap, cap->expires, jiffies); spin_unlock(&inode->i_lock); - break; + } else { + WARN_ON(__ceph_caps_wanted(cap->ci)); + dout(20, " dropping %p cap %p %s\n", inode, cap, + ceph_cap_string(cap->issued)); + last_cap = __ceph_remove_cap(cap); + spin_unlock(&inode->i_lock); + if (last_cap) + iput(inode); } - /* wanted? */ - wanted = __ceph_caps_wanted(cap->ci); - if ((ceph_inode(inode)->i_dirty_caps & cap->issued) == 0 && - wanted == 0 && cap->flushing == 0) { - dout(20, " dropping %p cap %p\n", inode, cap); - last_cap = __ceph_remove_cap(cap); - } else { - dout(20, " keeping %p cap %p (wanted %s flushing %s)\n", - inode, cap, ceph_cap_string(wanted), - ceph_cap_string(cap->flushing)); + spin_lock(&session->s_rdcaps_lock); + } + spin_unlock(&session->s_rdcaps_lock); +} + +/* + * Drop open file reference. If we were the last open file, + * we may need to release capabilities to the MDS (or schedule + * their delayed release). + */ +void ceph_put_fmode(struct ceph_inode_info *ci, int fmode) +{ + struct inode *inode = &ci->vfs_inode; + int last = 0; + + spin_lock(&inode->i_lock); + dout(20, "put_fmode %p fmode %d %d -> %d\n", inode, fmode, + ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1); + BUG_ON(ci->i_nr_by_mode[fmode] == 0); + if (--ci->i_nr_by_mode[fmode] == 0) { + last++; + + /* maybe turn caps into rdcaps? */ + if (__ceph_caps_wanted(ci) == 0) { + struct rb_node *p; + struct ceph_cap *cap; + + for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) { + cap = rb_entry(p, struct ceph_cap, ci_node); + __adjust_cap_rdcaps_listing(ci, cap, 0); + } } - spin_unlock(&inode->i_lock); - if (last_cap) - iput(inode); } + spin_unlock(&inode->i_lock); + + if (last && ci->i_vino.snap == CEPH_NOSNAP) + ceph_check_caps(ci, 0, 0, NULL); } diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 3e5c08e2ea1bb..c4af7e3873db2 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -1288,28 +1288,6 @@ void ceph_inode_set_size(struct inode *inode, loff_t size) } } -/* - * Drop open file reference. If we were the last open file, - * we may need to release capabilities to the MDS (or schedule - * their delayed release). - */ -void ceph_put_fmode(struct ceph_inode_info *ci, int fmode) -{ - int last = 0; - - spin_lock(&ci->vfs_inode.i_lock); - dout(20, "put_mode %p fmode %d %d -> %d\n", &ci->vfs_inode, fmode, - ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1); - BUG_ON(ci->i_nr_by_mode[fmode] == 0); - if (--ci->i_nr_by_mode[fmode] == 0) - last++; - spin_unlock(&ci->vfs_inode.i_lock); - - if (last && ci->i_vino.snap == CEPH_NOSNAP) - ceph_check_caps(ci, 0, 0, NULL); -} - - /* * Write back inode data in a worker thread. (This can't be done * in the message handler context.) diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index 050d72ada42b9..552b1d64c3a7b 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -335,6 +335,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, s->s_renew_requested = 0; INIT_LIST_HEAD(&s->s_caps); INIT_LIST_HEAD(&s->s_rdcaps); + spin_lock_init(&s->s_rdcaps_lock); s->s_nr_caps = 0; atomic_set(&s->s_ref, 1); INIT_LIST_HEAD(&s->s_waiting); diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 7b07a63102f8e..a250872e5ba21 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -118,6 +118,7 @@ struct ceph_mds_session { unsigned long s_renew_requested; /* last time we sent a renew req */ struct list_head s_caps; /* all caps issued by this session */ struct list_head s_rdcaps; /* just the readonly caps */ + spinlock_t s_rdcaps_lock; int s_nr_caps; atomic_t s_ref; struct list_head s_waiting; /* waiting requests */ diff --git a/src/kernel/super.h b/src/kernel/super.h index b9f3e0678fcab..a14c73e5ec6cc 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -484,13 +484,6 @@ static inline int __ceph_caps_wanted(struct ceph_inode_info *ci) return w; } -/* for counting open files by mode */ -static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode) -{ - ci->i_nr_by_mode[mode]++; -} -extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode); - static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) { return (struct ceph_client *)inode->i_sb->s_fs_info; @@ -739,6 +732,13 @@ static inline void ceph_release_caps(struct inode *inode, int mask) ceph_check_caps(ceph_inode(inode), 1, mask, NULL); } +/* for counting open files by mode */ +static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode) +{ + ci->i_nr_by_mode[mode]++; +} +extern void ceph_put_fmode(struct ceph_inode_info *ci, int mode); + /* addr.c */ extern const struct address_space_operations ceph_aops; extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); -- 2.39.5