]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: put delayed caps on single queue, use existing mdsc delayed work handler
authorSage Weil <sage@newdream.net>
Thu, 1 May 2008 14:13:47 +0000 (07:13 -0700)
committerSage Weil <sage@newdream.net>
Thu, 1 May 2008 14:13:47 +0000 (07:13 -0700)
src/TODO
src/kernel/inode.c
src/kernel/mds_client.c
src/kernel/mds_client.h
src/kernel/super.c
src/kernel/super.h

index befdcf7d4e8e5afb8cd6139687dfca2900e26080..c518a9a093f352533edc9705d38d25eac950bc6a 100644 (file)
--- a/src/TODO
+++ b/src/TODO
@@ -20,8 +20,7 @@ userspace client
 
 kernel client
 - flush caps on sync, fsync, etc.
-  - hmm, should these go in a per-session "check" list, instead of independently scheduling delayed_work for each inode?
-- fsync should flush cap file size to mds.  not sure how to block on that, however.. maybe a want_reply flag in the cap msg?
+  - do we need to block?
 - timeout mds session close on umount
 - file_data_version stuff!
 - deal with CAP_RDCACHE properly: invalidate cache pages?
index d620c044dfaee201e69da4b0234c6e04f63b570c..91a2c7b6e42b9fe5d297ec38d0f4a64a5fa150f8 100644 (file)
@@ -746,38 +746,33 @@ void __ceph_remove_cap(struct ceph_inode_cap *cap)
 void ceph_remove_cap(struct ceph_inode_cap *cap)
 {
        struct inode *inode = &cap->ci->vfs_inode;
-       struct ceph_inode_info *ci = ceph_inode(inode);
-       int was_last;
 
        spin_lock(&inode->i_lock);
        __ceph_remove_cap(cap);
-       was_last = list_empty(&ci->i_caps);
        spin_unlock(&inode->i_lock);
-       if (was_last)
-               cancel_delayed_work_sync(&ci->i_cap_dwork);
        iput(inode);
 }
 
-void ceph_cap_delayed_work(struct work_struct *work)
+/*
+ * caller holds i_lock
+ *    -> client->cap_delay_lock
+ */
+void __ceph_cap_delay_requeue(struct ceph_mds_client *mdsc,
+                             struct ceph_inode_info *ci)
 {
-       struct ceph_inode_info *ci = container_of(work,
-                                                 struct ceph_inode_info,
-                                                 i_cap_dwork.work);
-       spin_lock(&ci->vfs_inode.i_lock);
-       if (ci->i_hold_caps_until &&
-           time_before(jiffies, ci->i_hold_caps_until)) {
-               dout(10, "cap_dwork on %p -- rescheduling\n", &ci->vfs_inode);
-               schedule_delayed_work(&ci->i_cap_dwork, 
-                                     time_sub(ci->i_hold_caps_until, jiffies));
-               spin_unlock(&ci->vfs_inode.i_lock);
-       } else {
-               dout(10, "cap_dwork on %p\n", &ci->vfs_inode);
-               spin_unlock(&ci->vfs_inode.i_lock);
-               ceph_check_caps(ci, 1);
-       }
-       dout(10, "cap_dwork on %p done\n", &ci->vfs_inode);
+       ci->i_hold_caps_until = round_jiffies(jiffies + HZ * 5);
+       dout(10, "__cap_delay_requeue %p at %lu\n", &ci->vfs_inode,
+            ci->i_hold_caps_until);
+       spin_lock(&mdsc->cap_delay_lock);
+       if (list_empty(&ci->i_cap_delay_list))
+               igrab(&ci->vfs_inode);
+       else
+               list_del_init(&ci->i_cap_delay_list);
+       list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+       spin_unlock(&mdsc->cap_delay_lock);
 }
 
+
 /*
  * examine currently used, wanted versus held caps.
  *  release, ack revoked caps to mds as appropriate.
@@ -801,16 +796,8 @@ retry:
        dout(10, "check_caps %p wanted %d used %d issued %d\n", inode,
             wanted, used, __ceph_caps_issued(ci));
 
-       if (!is_delayed) {
-               unsigned long until = round_jiffies(jiffies + HZ * 5);
-               if (time_after(until, ci->i_hold_caps_until)) {
-                       ci->i_hold_caps_until = until;
-                       dout(10, "hold_caps_until %lu\n", until);
-                       cancel_delayed_work(&ci->i_cap_dwork);
-                       schedule_delayed_work(&ci->i_cap_dwork,
-                                             time_sub(until, jiffies));
-               }
-       }
+       if (!is_delayed)
+               __ceph_cap_delay_requeue(mdsc, ci);
 
        list_for_each(p, &ci->i_caps) {
                int revoking;
@@ -865,10 +852,9 @@ ack:
                        }
                }
 
-               /* send_cap drops i_lock AND s_mutex */
+               /* send_cap drops i_lock */
                removed_last = __ceph_mdsc_send_cap(mdsc, session, cap,
                                                    used, wanted, !is_delayed);
-               session = 0;
                if (removed_last)
                        goto out;
                goto retry;
index 91ebeb26561bd026275dc850c932c444f8d0e230..3a0772caed03bbcf19b136644baca8da02bb1bfb 100644 (file)
@@ -1471,6 +1471,17 @@ int send_renewcaps(struct ceph_mds_client *mdsc,
        return 0;
 }
 
+void __cap_delay_cancel(struct ceph_mds_client *mdsc,
+                       struct ceph_inode_info *ci)
+{
+       dout(10, "__cap_delay_cancel %p\n", &ci->vfs_inode);
+       if (list_empty(&ci->i_cap_delay_list))
+               return;
+       spin_lock(&mdsc->cap_delay_lock);
+       list_del_init(&ci->i_cap_delay_list);
+       spin_unlock(&mdsc->cap_delay_lock);
+       iput(&ci->vfs_inode);
+}
 
 /*
  * called with i_lock, then drops it.
@@ -1511,6 +1522,8 @@ int __ceph_mdsc_send_cap(struct ceph_mds_client *mdsc,
        if (wanted == 0) {
                __ceph_remove_cap(cap);
                removed_last = list_empty(&ci->i_caps);
+               if (removed_last && cancel_work)
+                       __cap_delay_cancel(mdsc, ci);
        }
        spin_unlock(&inode->i_lock);
 
@@ -1523,17 +1536,37 @@ int __ceph_mdsc_send_cap(struct ceph_mds_client *mdsc,
        send_cap_ack(mdsc, ceph_ino(inode),
                     keep, wanted, seq,
                     size, max_size, &mtime, &atime, session->s_mds);
-       
-       mutex_unlock(&session->s_mutex);
 
-       if (wanted == 0) {
-               if (removed_last && cancel_work)
-                       cancel_delayed_work_sync(&ci->i_cap_dwork);
+       if (wanted == 0)
                iput(inode);  /* removed cap */
-       }
+
        return removed_last;
 }
 
+static void check_delayed_caps(struct ceph_mds_client *mdsc)
+{
+       struct ceph_inode_info *ci;
+
+       dout(10, "check_delayed_caps\n");
+       while (1) {
+               spin_lock(&mdsc->cap_delay_lock);
+               if (list_empty(&mdsc->cap_delay_list))
+                       goto out_unlock;
+               ci = list_first_entry(&mdsc->cap_delay_list,
+                                     struct ceph_inode_info,
+                                     i_cap_delay_list);
+               if (time_before(jiffies, ci->i_hold_caps_until))
+                       break;
+               list_del_init(&ci->i_cap_delay_list);
+               spin_unlock(&mdsc->cap_delay_lock);
+               dout(10, "check_delayed_caps on %p\n", &ci->vfs_inode);
+               ceph_check_caps(ci, 1);
+               iput(&ci->vfs_inode);
+       }
+
+out_unlock:
+       spin_unlock(&mdsc->cap_delay_lock);
+}
 
 static void flush_write_caps(struct ceph_mds_client *mdsc,
                             struct ceph_mds_session *session)
@@ -1553,8 +1586,8 @@ static void flush_write_caps(struct ceph_mds_client *mdsc,
                }
                used = __ceph_caps_used(cap->ci);
                wanted = __ceph_caps_wanted(cap->ci);
-               /* FIXME: this drops s_mutex, which we dont want, ugh */
-               __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 0);
+
+               __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 1);
        }
 }
 
@@ -1765,10 +1798,12 @@ void delayed_work(struct work_struct *work)
 
        dout(10, "delayed_work on %p renew_caps=%d\n", mdsc, renew_caps);
 
-       /* renew caps */
        spin_lock(&mdsc->lock);
        if (renew_caps)
                mdsc->last_renew_caps = jiffies;
+
+       check_delayed_caps(mdsc);
+
        for (i = 0; i < mdsc->max_sessions; i++) {
                struct ceph_mds_session *session = __get_session(mdsc, i);
                if (session == 0)
@@ -1807,6 +1842,8 @@ void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client)
        init_completion(&mdsc->session_close_waiters);
        INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
        mdsc->last_renew_caps = jiffies;
+       INIT_LIST_HEAD(&mdsc->cap_delay_list);
+       spin_lock_init(&mdsc->cap_delay_lock);
 }
 
 void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
index 9edc00755624146abdb189e702a748830f4ba76a..4957362c2055d60f391ded05d3aff642537d40f7 100644 (file)
@@ -106,6 +106,8 @@ struct ceph_mds_client {
        struct completion       map_waiters, session_close_waiters;
        struct delayed_work     delayed_work;  /* delayed work */
        unsigned long last_renew_caps;
+       struct list_head cap_delay_list;
+       spinlock_t cap_delay_lock;
 };
 
 extern const char *ceph_mds_op_name(int op);
index 1469cfffcd0607ca3bdbe781adce2c9254bc17df..0c57e6b19473aa9dc926d93c3fe16b1a75c92419 100644 (file)
@@ -174,11 +174,11 @@ static struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_rd_ref = ci->i_rdcache_ref = 0;
        ci->i_wr_ref = ci->i_wrbuffer_ref = 0;
        ci->i_hold_caps_until = 0;
+       INIT_LIST_HEAD(&ci->i_cap_delay_list);
 
        ci->i_hashval = 0;
 
        INIT_WORK(&ci->i_wb_work, ceph_inode_writeback);
-       INIT_DELAYED_WORK(&ci->i_cap_dwork, ceph_cap_delayed_work);
 
        return &ci->vfs_inode;
 }
@@ -548,6 +548,7 @@ struct ceph_client *ceph_create_client(struct ceph_mount_args *args,
 
        init_waitqueue_head(&cl->mount_wq);
        spin_lock_init(&cl->sb_lock);
+
        get_client_counter();
 
        cl->wb_wq = create_workqueue("ceph-writeback");
index 29d396f51fb75aeb1d29719897c22385311dc9b3..701fd201ccb3ef6d899bf37b562b11d73d7e7535 100644 (file)
@@ -181,6 +181,7 @@ struct ceph_inode_info {
        struct ceph_inode_cap i_static_caps[STATIC_CAPS];
        wait_queue_head_t i_cap_wq;
        unsigned long i_hold_caps_until; /* jiffies */
+       struct list_head i_cap_delay_list;
 
        int i_nr_by_mode[CEPH_FILE_MODE_NUM];
        loff_t i_max_size;      /* size authorized by mds */
@@ -195,7 +196,6 @@ struct ceph_inode_info {
        unsigned long i_hashval;
 
        struct work_struct i_wb_work;  /* writeback work */
-       struct delayed_work i_cap_dwork;  /* cap work */
 
        struct inode vfs_inode; /* at end */
 };
@@ -385,7 +385,6 @@ extern int ceph_get_cap_refs(struct ceph_inode_info *ci, int need, int want, int
 extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int got);
 extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
 extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr);
-extern void ceph_cap_delayed_work(struct work_struct *work);
 extern void ceph_check_caps(struct ceph_inode_info *ci, int is_delayed);
 extern void ceph_inode_set_size(struct inode *inode, loff_t size);
 extern void ceph_inode_writeback(struct work_struct *work);