From cd4ae79d9092354f03b7f77535a5fc94f9882cd5 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 20 May 2008 15:29:37 -0700 Subject: [PATCH] kclient: add ceph_aio_write (instead of generic) to take cap refs --- src/TODO | 2 ++ src/kernel/file.c | 74 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/src/TODO b/src/TODO index 080432d3ebb28..f56542fdbe243 100644 --- a/src/TODO +++ b/src/TODO @@ -27,6 +27,8 @@ userspace client - also needs cope with mds failures kernel client +- close WR|WRBUFFER -> WR transition overwrite hole + - get_cap_refs needs to block if want WR+WRBUFFER, issued WR, but still implemented WRBUFFER. - flush caps on sync, fsync, etc. - do we need to block? - timeout mds session close on umount diff --git a/src/kernel/file.c b/src/kernel/file.c index d932ff8f66200..bfa1ba32dfcfd 100644 --- a/src/kernel/file.c +++ b/src/kernel/file.c @@ -286,18 +286,11 @@ out: /* * ditto */ -ssize_t ceph_write(struct file *filp, const char __user *buf, - size_t len, loff_t *ppos) +static void check_max_size(struct inode *inode, loff_t endoff) { - struct inode *inode = filp->f_dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - ssize_t ret; - int got = 0; int check = 0; - loff_t endoff = *ppos + len; - - __ceph_do_pending_vmtruncate(inode); - + /* do we need to explicitly request a larger max_size? */ spin_lock(&inode->i_lock); if ((endoff >= ci->i_max_size || @@ -311,13 +304,26 @@ ssize_t ceph_write(struct file *filp, const char __user *buf, spin_unlock(&inode->i_lock); if (check) ceph_check_caps(ci, 0); +} + +ssize_t ceph_write(struct file *filp, const char __user *buf, + size_t len, loff_t *ppos) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + ssize_t ret; + int got = 0; + loff_t endoff = *ppos + len; + + __ceph_do_pending_vmtruncate(inode); + check_max_size(inode, endoff); dout(10, "write %p %llu~%u getting caps. i_size %llu\n", inode, *ppos, (unsigned)len, inode->i_size); ret = wait_event_interruptible(ci->i_cap_wq, ceph_get_cap_refs(ci, CEPH_CAP_WR, CEPH_CAP_WRBUFFER, - &got, *ppos+len)); + &got, endoff)); if (ret < 0) goto out; dout(10, "write %p %llu~%u got cap refs on %d\n", @@ -336,6 +342,46 @@ out: return ret; } +/* + * verify caps in aio_write as well! + */ +ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_dentry->d_inode; + struct ceph_inode_info *ci = ceph_inode(inode); + loff_t endoff = pos + iov->iov_len; + int got = 0; + int ret; + + check_max_size(inode, endoff); + dout(10, "aio_write %p %llu~%u getting caps. i_size %llu\n", + inode, pos, (unsigned)iov->iov_len, inode->i_size); + ret = wait_event_interruptible(ci->i_cap_wq, + ceph_get_cap_refs(ci, CEPH_CAP_WR, + CEPH_CAP_WRBUFFER, + &got, endoff)); + if (ret < 0) + goto out; + + dout(10, "aio_write %p %llu~%u got cap refs on %d\n", + inode, pos, (unsigned)iov->iov_len, got); + + if ((got & CEPH_CAP_WRBUFFER) == 0 || + (inode->i_sb->s_flags & MS_SYNCHRONOUS)) + /* fixme, this isn't async */ + ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, &pos); + else + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + +out: + dout(10, "aio_write %p %llu~%u dropping cap refs on %d\n", + inode, pos, (unsigned)iov->iov_len, got); + ceph_put_cap_refs(ci, got); + return ret; +} + static int ceph_fsync(struct file *file, struct dentry *dentry, int datasync) { struct inode *inode = dentry->d_inode; @@ -347,13 +393,15 @@ static int ceph_fsync(struct file *file, struct dentry *dentry, int datasync) return ret; /* - * fixme: also ensure that caps are flushed to mds + * fixme: also ensure that caps are flushed to mds? + * well, not strictly necessary, since with the data on the osds + * the mds can always reconstruct the file size. */ return 0; } -static int ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +static long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return -EINVAL; } @@ -365,7 +413,7 @@ const struct file_operations ceph_file_fops = { .read = ceph_read, .write = ceph_write, .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .aio_write = ceph_aio_write, .mmap = generic_file_mmap, .fsync = ceph_fsync, .splice_read = generic_file_splice_read, -- 2.39.5