]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: only flush caps to auth mds. wait in write_inode, if asked.
authorSage Weil <sage@newdream.net>
Wed, 1 Apr 2009 14:02:14 +0000 (07:02 -0700)
committerSage Weil <sage@newdream.net>
Wed, 1 Apr 2009 16:06:30 +0000 (09:06 -0700)
src/TODO
src/kernel/caps.c

index bf0749591a64d06daed883420d3b4a37f5d4910b..9d0bfd20744b2c120deb0940fdf72b73b0c87196 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -45,14 +45,11 @@ kclient caps
 /- release on destroy_inode
 /- implement write_inode?
 /- hold a pin_ref for request r_inode or r_locked_dir.
+/- flush dirty caps to auth mds only.  resend on cap import.
+- reflush caps on mds recovery
 - cap (release) reservations
 - size limit on readdir result, partial dirfrag readdir
 - revisit unmount
-- dirty caps 
-  - how to flush caps only to auth mds
-  - redirty inode caps if mds session shuts down?
-  - what if all mds's are down when write_inode is called?  redirty inode then too?
-  - what does an mds do with a cap flush if it is not auth?
 - fix up fill_trace and other comments
 
 
index 3fcccf87ab02a5853f5d5292a88b364481606e47..b5dd750f2df4ef612531e1d3722070cc8926bc93 100644 (file)
@@ -625,7 +625,7 @@ static void __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        int held = cap->issued | cap->implemented;
        int revoking = cap->implemented & ~cap->issued;
        int dropping = cap->issued & ~retain;
-       int keep;
+       int keep, flushing;
        u64 seq, mseq, time_warp_seq, follows;
        u64 size, max_size;
        struct timespec mtime, atime;
@@ -633,7 +633,6 @@ static void __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
        mode_t mode;
        uid_t uid;
        gid_t gid;
-       int flushing;
        int mds = cap->session->s_mds;
 
        dout(10, "__send_cap cap %p session %p %s -> %s (revoking %s)\n",
@@ -981,18 +980,20 @@ ack:
                        took_snap_rwsem = 1;
                }
 
+               if (cap == ci->i_auth_cap) {
+                       /* update dirty, flushing bits */
+                       dirty = __ceph_caps_dirty(ci);
+                       cap->flushing |= dirty & cap->implemented;
+                       if (cap->flushing) {
+                               ci->i_dirty_caps &= ~cap->flushing;
+                               dout(10, " flushing %s, dirty_caps now %s\n",
+                                    ceph_cap_string(cap->flushing),
+                                    ceph_cap_string(ci->i_dirty_caps));
+                       }
+               }
+
                mds = cap->mds;  /* remember mds, so we don't repeat */
 
-               /* update dirty, flushing bits */
-               dirty = __ceph_caps_dirty(ci);
-               cap->flushing |= dirty & cap->implemented;
-               if (cap->flushing) {
-                       ci->i_dirty_caps &= ~cap->flushing;
-                       dout(10, " flushing %s, dirty_caps now %s\n",
-                            ceph_cap_string(cap->flushing),
-                            ceph_cap_string(ci->i_dirty_caps));
-               }
-               
                /* __send_cap drops i_lock */
                __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want, retain);
 
@@ -1007,45 +1008,76 @@ ack:
 }
 
 /*
- * Flush any dirty caps back to the mds
+ * Try to flush dirty caps back to the auth mds.
  */
-int ceph_write_inode(struct inode *inode, int unused)
+static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session)
 {
        struct ceph_mds_client *mdsc = &ceph_client(inode->i_sb)->mdsc;
        struct ceph_inode_info *ci = ceph_inode(inode);
-       struct rb_node *p;
-       struct ceph_cap *cap;
+       int unlock_session = session ? 0:1;
        int dirty;
-       int mds = -1;
 
-       dout(10, "write_inode %p\n", inode);
-more:
+retry:
        spin_lock(&inode->i_lock);
        dirty = __ceph_caps_dirty(ci);
-       if (dirty) {
+       if (dirty && ci->i_auth_cap) {
+               struct ceph_cap *cap = ci->i_auth_cap;
                int used = __ceph_caps_used(ci);
-               int want = __ceph_caps_mds_wanted(ci);
 
-               for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
-                       cap = rb_entry(p, struct ceph_cap, ci_node);
-                       if (mds >= cap->mds)
-                               continue;
-                       mds = cap->mds;
+               if (!session) {
+                       spin_unlock(&inode->i_lock);
+                       session = cap->session;
+                       mutex_lock(&session->s_mutex);
+                       goto retry;
+               }
+               BUG_ON(session != cap->session);
+               if (cap->session->s_state < CEPH_MDS_SESSION_OPEN)
+                       goto out;
 
-                       cap->flushing |= dirty & cap->implemented;
-                       if (cap->flushing) {
-                               ci->i_dirty_caps &= ~cap->flushing;
-                               dout(10, " flushing %s, dirty_caps now %s\n",
-                                    ceph_cap_string(cap->flushing),
-                                    ceph_cap_string(ci->i_dirty_caps));
-                       }
-                       __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
-                                  cap->issued | cap->implemented);
-                       goto more;
+               cap->flushing |= dirty & cap->implemented;
+               if (cap->flushing) {
+                       ci->i_dirty_caps &= ~cap->flushing;
+                       dout(10, " flushing %s, dirty_caps now %s\n",
+                            ceph_cap_string(cap->flushing),
+                            ceph_cap_string(ci->i_dirty_caps));
                }
+               /* __send_cap drops i_lock */
+               __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, cap->mds_wanted,
+                          cap->issued | cap->implemented);
+               goto out_unlocked;
        }
+out:
        spin_unlock(&inode->i_lock);
-       return 0;
+out_unlocked:
+       if (session && unlock_session)
+               mutex_unlock(&session->s_mutex);
+       return dirty;
+}
+
+static int caps_are_clean(struct inode *inode)
+{
+       int dirty;
+       spin_lock(&inode->i_lock);
+       dirty = __ceph_caps_dirty(ceph_inode(inode));
+       spin_unlock(&inode->i_lock);
+       return !dirty;
+}
+
+/*
+ * Flush any dirty caps back to the mds
+ */
+int ceph_write_inode(struct inode *inode, int wait)
+{
+       struct ceph_inode_info *ci = ceph_inode(inode);
+       int err = 0;
+       int dirty;
+
+       dout(10, "write_inode %p\n", inode);
+       dirty = try_flush_caps(inode, NULL);
+       if (dirty && wait)
+               err = wait_event_interruptible(ci->i_cap_wq,
+                                              caps_are_clean(inode));
+       return err;
 }
 
 
@@ -1528,8 +1560,10 @@ static void handle_cap_flush_ack(struct inode *inode,
        cap->flushing &= ~cleaned;
        new_dirty = __ceph_caps_dirty(ci);
        spin_unlock(&inode->i_lock);
-       if (old_dirty && !new_dirty)
+       if (old_dirty && !new_dirty) {
+               wake_up(&ci->i_cap_wq);
                iput(inode);
+       }
 }
 
 /*
@@ -1700,6 +1734,7 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
        ceph_add_cap(inode, session, cap_id, -1,
                     issued, wanted, seq, mseq, realmino,
                     ttl_ms, jiffies - ttl_ms/2, CEPH_CAP_FLAG_AUTH, NULL);
+       try_flush_caps(inode, session);
        up_read(&mdsc->snap_rwsem);
 }