From 4a732cb7a700f06d54c9918fa3f340b386200508 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 1 May 2008 07:13:47 -0700 Subject: [PATCH] kclient: put delayed caps on single queue, use existing mdsc delayed work handler --- src/TODO | 3 +-- src/kernel/inode.c | 54 +++++++++++++++------------------------- src/kernel/mds_client.c | 55 ++++++++++++++++++++++++++++++++++------- src/kernel/mds_client.h | 2 ++ src/kernel/super.c | 3 ++- src/kernel/super.h | 3 +-- 6 files changed, 72 insertions(+), 48 deletions(-) diff --git a/src/TODO b/src/TODO index befdcf7d4e8e..c518a9a093f3 100644 --- a/src/TODO +++ b/src/TODO @@ -20,8 +20,7 @@ userspace client kernel client - flush caps on sync, fsync, etc. - - hmm, should these go in a per-session "check" list, instead of independently scheduling delayed_work for each inode? -- fsync should flush cap file size to mds. not sure how to block on that, however.. maybe a want_reply flag in the cap msg? + - do we need to block? - timeout mds session close on umount - file_data_version stuff! - deal with CAP_RDCACHE properly: invalidate cache pages? diff --git a/src/kernel/inode.c b/src/kernel/inode.c index d620c044dfae..91a2c7b6e42b 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -746,38 +746,33 @@ void __ceph_remove_cap(struct ceph_inode_cap *cap) void ceph_remove_cap(struct ceph_inode_cap *cap) { struct inode *inode = &cap->ci->vfs_inode; - struct ceph_inode_info *ci = ceph_inode(inode); - int was_last; spin_lock(&inode->i_lock); __ceph_remove_cap(cap); - was_last = list_empty(&ci->i_caps); spin_unlock(&inode->i_lock); - if (was_last) - cancel_delayed_work_sync(&ci->i_cap_dwork); iput(inode); } -void ceph_cap_delayed_work(struct work_struct *work) +/* + * caller holds i_lock + * -> client->cap_delay_lock + */ +void __ceph_cap_delay_requeue(struct ceph_mds_client *mdsc, + struct ceph_inode_info *ci) { - struct ceph_inode_info *ci = container_of(work, - struct ceph_inode_info, - i_cap_dwork.work); - spin_lock(&ci->vfs_inode.i_lock); - if (ci->i_hold_caps_until && - time_before(jiffies, ci->i_hold_caps_until)) { - dout(10, "cap_dwork on %p -- rescheduling\n", &ci->vfs_inode); - schedule_delayed_work(&ci->i_cap_dwork, - time_sub(ci->i_hold_caps_until, jiffies)); - spin_unlock(&ci->vfs_inode.i_lock); - } else { - dout(10, "cap_dwork on %p\n", &ci->vfs_inode); - spin_unlock(&ci->vfs_inode.i_lock); - ceph_check_caps(ci, 1); - } - dout(10, "cap_dwork on %p done\n", &ci->vfs_inode); + ci->i_hold_caps_until = round_jiffies(jiffies + HZ * 5); + dout(10, "__cap_delay_requeue %p at %lu\n", &ci->vfs_inode, + ci->i_hold_caps_until); + spin_lock(&mdsc->cap_delay_lock); + if (list_empty(&ci->i_cap_delay_list)) + igrab(&ci->vfs_inode); + else + list_del_init(&ci->i_cap_delay_list); + list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list); + spin_unlock(&mdsc->cap_delay_lock); } + /* * examine currently used, wanted versus held caps. * release, ack revoked caps to mds as appropriate. @@ -801,16 +796,8 @@ retry: dout(10, "check_caps %p wanted %d used %d issued %d\n", inode, wanted, used, __ceph_caps_issued(ci)); - if (!is_delayed) { - unsigned long until = round_jiffies(jiffies + HZ * 5); - if (time_after(until, ci->i_hold_caps_until)) { - ci->i_hold_caps_until = until; - dout(10, "hold_caps_until %lu\n", until); - cancel_delayed_work(&ci->i_cap_dwork); - schedule_delayed_work(&ci->i_cap_dwork, - time_sub(until, jiffies)); - } - } + if (!is_delayed) + __ceph_cap_delay_requeue(mdsc, ci); list_for_each(p, &ci->i_caps) { int revoking; @@ -865,10 +852,9 @@ ack: } } - /* send_cap drops i_lock AND s_mutex */ + /* send_cap drops i_lock */ removed_last = __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, !is_delayed); - session = 0; if (removed_last) goto out; goto retry; diff --git a/src/kernel/mds_client.c b/src/kernel/mds_client.c index 91ebeb26561b..3a0772caed03 100644 --- a/src/kernel/mds_client.c +++ b/src/kernel/mds_client.c @@ -1471,6 +1471,17 @@ int send_renewcaps(struct ceph_mds_client *mdsc, return 0; } +void __cap_delay_cancel(struct ceph_mds_client *mdsc, + struct ceph_inode_info *ci) +{ + dout(10, "__cap_delay_cancel %p\n", &ci->vfs_inode); + if (list_empty(&ci->i_cap_delay_list)) + return; + spin_lock(&mdsc->cap_delay_lock); + list_del_init(&ci->i_cap_delay_list); + spin_unlock(&mdsc->cap_delay_lock); + iput(&ci->vfs_inode); +} /* * called with i_lock, then drops it. @@ -1511,6 +1522,8 @@ int __ceph_mdsc_send_cap(struct ceph_mds_client *mdsc, if (wanted == 0) { __ceph_remove_cap(cap); removed_last = list_empty(&ci->i_caps); + if (removed_last && cancel_work) + __cap_delay_cancel(mdsc, ci); } spin_unlock(&inode->i_lock); @@ -1523,17 +1536,37 @@ int __ceph_mdsc_send_cap(struct ceph_mds_client *mdsc, send_cap_ack(mdsc, ceph_ino(inode), keep, wanted, seq, size, max_size, &mtime, &atime, session->s_mds); - - mutex_unlock(&session->s_mutex); - if (wanted == 0) { - if (removed_last && cancel_work) - cancel_delayed_work_sync(&ci->i_cap_dwork); + if (wanted == 0) iput(inode); /* removed cap */ - } + return removed_last; } +static void check_delayed_caps(struct ceph_mds_client *mdsc) +{ + struct ceph_inode_info *ci; + + dout(10, "check_delayed_caps\n"); + while (1) { + spin_lock(&mdsc->cap_delay_lock); + if (list_empty(&mdsc->cap_delay_list)) + goto out_unlock; + ci = list_first_entry(&mdsc->cap_delay_list, + struct ceph_inode_info, + i_cap_delay_list); + if (time_before(jiffies, ci->i_hold_caps_until)) + break; + list_del_init(&ci->i_cap_delay_list); + spin_unlock(&mdsc->cap_delay_lock); + dout(10, "check_delayed_caps on %p\n", &ci->vfs_inode); + ceph_check_caps(ci, 1); + iput(&ci->vfs_inode); + } + +out_unlock: + spin_unlock(&mdsc->cap_delay_lock); +} static void flush_write_caps(struct ceph_mds_client *mdsc, struct ceph_mds_session *session) @@ -1553,8 +1586,8 @@ static void flush_write_caps(struct ceph_mds_client *mdsc, } used = __ceph_caps_used(cap->ci); wanted = __ceph_caps_wanted(cap->ci); - /* FIXME: this drops s_mutex, which we dont want, ugh */ - __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 0); + + __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 1); } } @@ -1765,10 +1798,12 @@ void delayed_work(struct work_struct *work) dout(10, "delayed_work on %p renew_caps=%d\n", mdsc, renew_caps); - /* renew caps */ spin_lock(&mdsc->lock); if (renew_caps) mdsc->last_renew_caps = jiffies; + + check_delayed_caps(mdsc); + for (i = 0; i < mdsc->max_sessions; i++) { struct ceph_mds_session *session = __get_session(mdsc, i); if (session == 0) @@ -1807,6 +1842,8 @@ void ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) init_completion(&mdsc->session_close_waiters); INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work); mdsc->last_renew_caps = jiffies; + INIT_LIST_HEAD(&mdsc->cap_delay_list); + spin_lock_init(&mdsc->cap_delay_lock); } void ceph_mdsc_stop(struct ceph_mds_client *mdsc) diff --git a/src/kernel/mds_client.h b/src/kernel/mds_client.h index 9edc00755624..4957362c2055 100644 --- a/src/kernel/mds_client.h +++ b/src/kernel/mds_client.h @@ -106,6 +106,8 @@ struct ceph_mds_client { struct completion map_waiters, session_close_waiters; struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; + struct list_head cap_delay_list; + spinlock_t cap_delay_lock; }; extern const char *ceph_mds_op_name(int op); diff --git a/src/kernel/super.c b/src/kernel/super.c index 1469cfffcd06..0c57e6b19473 100644 --- a/src/kernel/super.c +++ b/src/kernel/super.c @@ -174,11 +174,11 @@ static struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_rd_ref = ci->i_rdcache_ref = 0; ci->i_wr_ref = ci->i_wrbuffer_ref = 0; ci->i_hold_caps_until = 0; + INIT_LIST_HEAD(&ci->i_cap_delay_list); ci->i_hashval = 0; INIT_WORK(&ci->i_wb_work, ceph_inode_writeback); - INIT_DELAYED_WORK(&ci->i_cap_dwork, ceph_cap_delayed_work); return &ci->vfs_inode; } @@ -548,6 +548,7 @@ struct ceph_client *ceph_create_client(struct ceph_mount_args *args, init_waitqueue_head(&cl->mount_wq); spin_lock_init(&cl->sb_lock); + get_client_counter(); cl->wb_wq = create_workqueue("ceph-writeback"); diff --git a/src/kernel/super.h b/src/kernel/super.h index 29d396f51fb7..701fd201ccb3 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -181,6 +181,7 @@ struct ceph_inode_info { struct ceph_inode_cap i_static_caps[STATIC_CAPS]; wait_queue_head_t i_cap_wq; unsigned long i_hold_caps_until; /* jiffies */ + struct list_head i_cap_delay_list; int i_nr_by_mode[CEPH_FILE_MODE_NUM]; loff_t i_max_size; /* size authorized by mds */ @@ -195,7 +196,6 @@ struct ceph_inode_info { unsigned long i_hashval; struct work_struct i_wb_work; /* writeback work */ - struct delayed_work i_cap_dwork; /* cap work */ struct inode vfs_inode; /* at end */ }; @@ -385,7 +385,6 @@ extern int ceph_get_cap_refs(struct ceph_inode_info *ci, int need, int want, int extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int got); extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr); -extern void ceph_cap_delayed_work(struct work_struct *work); extern void ceph_check_caps(struct ceph_inode_info *ci, int is_delayed); extern void ceph_inode_set_size(struct inode *inode, loff_t size); extern void ceph_inode_writeback(struct work_struct *work); -- 2.47.3