kernel client
- flush caps on sync, fsync, etc.
- - hmm, should these go in a per-session "check" list, instead of independently scheduling delayed_work for each inode?
-- fsync should flush cap file size to mds. not sure how to block on that, however.. maybe a want_reply flag in the cap msg?
+ - do we need to block?
- timeout mds session close on umount
- file_data_version stuff!
- deal with CAP_RDCACHE properly: invalidate cache pages?
void ceph_remove_cap(struct ceph_inode_cap *cap)
{
struct inode *inode = &cap->ci->vfs_inode;
- struct ceph_inode_info *ci = ceph_inode(inode);
- int was_last;
spin_lock(&inode->i_lock);
__ceph_remove_cap(cap);
- was_last = list_empty(&ci->i_caps);
spin_unlock(&inode->i_lock);
- if (was_last)
- cancel_delayed_work_sync(&ci->i_cap_dwork);
iput(inode);
}
-void ceph_cap_delayed_work(struct work_struct *work)
+/*
+ * caller holds i_lock
+ * -> client->cap_delay_lock
+ */
+void __ceph_cap_delay_requeue(struct ceph_mds_client *mdsc,
+ struct ceph_inode_info *ci)
{
- struct ceph_inode_info *ci = container_of(work,
- struct ceph_inode_info,
- i_cap_dwork.work);
- spin_lock(&ci->vfs_inode.i_lock);
- if (ci->i_hold_caps_until &&
- time_before(jiffies, ci->i_hold_caps_until)) {
- dout(10, "cap_dwork on %p -- rescheduling\n", &ci->vfs_inode);
- schedule_delayed_work(&ci->i_cap_dwork,
- time_sub(ci->i_hold_caps_until, jiffies));
- spin_unlock(&ci->vfs_inode.i_lock);
- } else {
- dout(10, "cap_dwork on %p\n", &ci->vfs_inode);
- spin_unlock(&ci->vfs_inode.i_lock);
- ceph_check_caps(ci, 1);
- }
- dout(10, "cap_dwork on %p done\n", &ci->vfs_inode);
+ ci->i_hold_caps_until = round_jiffies(jiffies + HZ * 5);
+ dout(10, "__cap_delay_requeue %p at %lu\n", &ci->vfs_inode,
+ ci->i_hold_caps_until);
+ spin_lock(&mdsc->cap_delay_lock);
+ if (list_empty(&ci->i_cap_delay_list))
+ igrab(&ci->vfs_inode);
+ else
+ list_del_init(&ci->i_cap_delay_list);
+ list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
+ spin_unlock(&mdsc->cap_delay_lock);
}
+
/*
* examine currently used, wanted versus held caps.
* release, ack revoked caps to mds as appropriate.
dout(10, "check_caps %p wanted %d used %d issued %d\n", inode,
wanted, used, __ceph_caps_issued(ci));
- if (!is_delayed) {
- unsigned long until = round_jiffies(jiffies + HZ * 5);
- if (time_after(until, ci->i_hold_caps_until)) {
- ci->i_hold_caps_until = until;
- dout(10, "hold_caps_until %lu\n", until);
- cancel_delayed_work(&ci->i_cap_dwork);
- schedule_delayed_work(&ci->i_cap_dwork,
- time_sub(until, jiffies));
- }
- }
+ if (!is_delayed)
+ __ceph_cap_delay_requeue(mdsc, ci);
list_for_each(p, &ci->i_caps) {
int revoking;
}
}
- /* send_cap drops i_lock AND s_mutex */
+ /* send_cap drops i_lock */
removed_last = __ceph_mdsc_send_cap(mdsc, session, cap,
used, wanted, !is_delayed);
- session = 0;
if (removed_last)
goto out;
goto retry;
return 0;
}
+void __cap_delay_cancel(struct ceph_mds_client *mdsc,
+ struct ceph_inode_info *ci)
+{
+ dout(10, "__cap_delay_cancel %p\n", &ci->vfs_inode);
+ if (list_empty(&ci->i_cap_delay_list))
+ return;
+ spin_lock(&mdsc->cap_delay_lock);
+ list_del_init(&ci->i_cap_delay_list);
+ spin_unlock(&mdsc->cap_delay_lock);
+ iput(&ci->vfs_inode);
+}
/*
* called with i_lock, then drops it.
if (wanted == 0) {
__ceph_remove_cap(cap);
removed_last = list_empty(&ci->i_caps);
+ if (removed_last && cancel_work)
+ __cap_delay_cancel(mdsc, ci);
}
spin_unlock(&inode->i_lock);
send_cap_ack(mdsc, ceph_ino(inode),
keep, wanted, seq,
size, max_size, &mtime, &atime, session->s_mds);
-
- mutex_unlock(&session->s_mutex);
- if (wanted == 0) {
- if (removed_last && cancel_work)
- cancel_delayed_work_sync(&ci->i_cap_dwork);
+ if (wanted == 0)
iput(inode); /* removed cap */
- }
+
return removed_last;
}
+static void check_delayed_caps(struct ceph_mds_client *mdsc)
+{
+ struct ceph_inode_info *ci;
+
+ dout(10, "check_delayed_caps\n");
+ while (1) {
+ spin_lock(&mdsc->cap_delay_lock);
+ if (list_empty(&mdsc->cap_delay_list))
+ goto out_unlock;
+ ci = list_first_entry(&mdsc->cap_delay_list,
+ struct ceph_inode_info,
+ i_cap_delay_list);
+ if (time_before(jiffies, ci->i_hold_caps_until))
+ break;
+ list_del_init(&ci->i_cap_delay_list);
+ spin_unlock(&mdsc->cap_delay_lock);
+ dout(10, "check_delayed_caps on %p\n", &ci->vfs_inode);
+ ceph_check_caps(ci, 1);
+ iput(&ci->vfs_inode);
+ }
+
+out_unlock:
+ spin_unlock(&mdsc->cap_delay_lock);
+}
static void flush_write_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session)
}
used = __ceph_caps_used(cap->ci);
wanted = __ceph_caps_wanted(cap->ci);
- /* FIXME: this drops s_mutex, which we dont want, ugh */
- __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 0);
+
+ __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 1);
}
}
dout(10, "delayed_work on %p renew_caps=%d\n", mdsc, renew_caps);
- /* renew caps */
spin_lock(&mdsc->lock);
if (renew_caps)
mdsc->last_renew_caps = jiffies;
+
+ check_delayed_caps(mdsc);
+
for (i = 0; i < mdsc->max_sessions; i++) {
struct ceph_mds_session *session = __get_session(mdsc, i);
if (session == 0)
init_completion(&mdsc->session_close_waiters);
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
mdsc->last_renew_caps = jiffies;
+ INIT_LIST_HEAD(&mdsc->cap_delay_list);
+ spin_lock_init(&mdsc->cap_delay_lock);
}
void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
struct completion map_waiters, session_close_waiters;
struct delayed_work delayed_work; /* delayed work */
unsigned long last_renew_caps;
+ struct list_head cap_delay_list;
+ spinlock_t cap_delay_lock;
};
extern const char *ceph_mds_op_name(int op);
ci->i_rd_ref = ci->i_rdcache_ref = 0;
ci->i_wr_ref = ci->i_wrbuffer_ref = 0;
ci->i_hold_caps_until = 0;
+ INIT_LIST_HEAD(&ci->i_cap_delay_list);
ci->i_hashval = 0;
INIT_WORK(&ci->i_wb_work, ceph_inode_writeback);
- INIT_DELAYED_WORK(&ci->i_cap_dwork, ceph_cap_delayed_work);
return &ci->vfs_inode;
}
init_waitqueue_head(&cl->mount_wq);
spin_lock_init(&cl->sb_lock);
+
get_client_counter();
cl->wb_wq = create_workqueue("ceph-writeback");
struct ceph_inode_cap i_static_caps[STATIC_CAPS];
wait_queue_head_t i_cap_wq;
unsigned long i_hold_caps_until; /* jiffies */
+ struct list_head i_cap_delay_list;
int i_nr_by_mode[CEPH_FILE_MODE_NUM];
loff_t i_max_size; /* size authorized by mds */
unsigned long i_hashval;
struct work_struct i_wb_work; /* writeback work */
- struct delayed_work i_cap_dwork; /* cap work */
struct inode vfs_inode; /* at end */
};
extern void ceph_take_cap_refs(struct ceph_inode_info *ci, int got);
extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had);
extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr);
-extern void ceph_cap_delayed_work(struct work_struct *work);
extern void ceph_check_caps(struct ceph_inode_info *ci, int is_delayed);
extern void ceph_inode_set_size(struct inode *inode, loff_t size);
extern void ceph_inode_writeback(struct work_struct *work);