From c2f5b52e95e52e5a2d6603a6fe7d507b6190ebfa Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Mon, 31 Mar 2008 16:37:57 -0700 Subject: [PATCH] kclient: write_begin and write_end --- src/TODO | 2 - src/kernel/addr.c | 174 ++++++++++++++++++++++++++++++++++++++------- src/kernel/inode.c | 14 +++- src/kernel/super.c | 1 + src/kernel/super.h | 4 +- 5 files changed, 164 insertions(+), 31 deletions(-) diff --git a/src/TODO b/src/TODO index 5f29b371e3ed5..2d371b329b86f 100644 --- a/src/TODO +++ b/src/TODO @@ -16,8 +16,6 @@ userspace client - reference count lease validations on path lookup? kernel client -- use list_for_each_safe for caps removal? - - revisit cap removal locking, make sure it's okay.... - trim expired leases so we don't indefinitely hold dcache refs... - carry wrbuffer/rdcache caps until data is flushed - this should make the utimes bit kick in diff --git a/src/kernel/addr.c b/src/kernel/addr.c index 1cc7a2ecef25f..9809b968eeb04 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -62,30 +62,30 @@ out_unlock: */ static int ceph_writepage(struct page *page, struct writeback_control *wbc) { - struct inode *inode = page->mapping->host; - struct ceph_inode_info *ci; - struct ceph_osd_client *osdc; - int err = 0; - - if (!page->mapping || !page->mapping->host) - return -EFAULT; - - ci = ceph_inode(inode); - osdc = &ceph_inode_to_client(inode)->osdc; - - get_page(page); - set_page_writeback(page); - SetPageUptodate(page); - - dout(10, "ceph_writepage inode %p page %p index %lu\n", - inode, page, page->index); - + struct inode *inode = page->mapping->host; + struct ceph_inode_info *ci; + struct ceph_osd_client *osdc; + int err = 0; + + if (!page->mapping || !page->mapping->host) + return -EFAULT; + + ci = ceph_inode(inode); + osdc = &ceph_inode_to_client(inode)->osdc; + + get_page(page); + set_page_writeback(page); + SetPageUptodate(page); + + dout(10, "ceph_writepage inode %p page %p index %lu\n", + inode, page, page->index); + /* write a page at the index of page->index, by size of PAGE_SIZE */ err = ceph_osdc_writepage(osdc, ceph_ino(inode), &ci->i_layout, - page->index << PAGE_SHIFT, PAGE_SIZE, page); + page->index << PAGE_SHIFT, PAGE_SIZE, page); if (err) goto out_unlock; - + /* update written data size in ceph_inode_info */ spin_lock(&inode->i_lock); if (inode->i_size <= PAGE_SIZE) { @@ -94,12 +94,12 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) dout(10, "extending file size to %d\n", (int)inode->i_size); } spin_unlock(&inode->i_lock); - + out_unlock: end_page_writeback(page); put_page(page); - - return err; + + return err; } /* @@ -125,6 +125,7 @@ static int ceph_writepages(struct address_space *mapping, struct writeback_contr return generic_writepages(mapping, wbc); } + /* * ceph_prepare_write: * allocate and initialize buffer heads for each page @@ -233,6 +234,7 @@ static int ceph_commit_write(struct file *filp, struct page *page, /* set the page as up-to-date and mark it as dirty */ SetPageUptodate(page); set_page_dirty(page); + ci->i_nr_dirty_pages++; } /*out_unlock:*/ @@ -240,11 +242,133 @@ static int ceph_commit_write(struct file *filp, struct page *page, } +/* + * newer write interface + */ +static int ceph_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + struct inode *inode = file->f_dentry->d_inode; + struct ceph_inode_info *ci; + struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; + struct page *page; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; + loff_t page_off = pos & PAGE_MASK; + int pos_in_page = pos & ~PAGE_MASK; + loff_t i_size; + int r; + + /* get a page*/ + page = __grab_cache_page(mapping, index); + if (!page) + return -ENOMEM; + *pagep = page; + + dout(10, "write_begin file %p inode %p page %p %d~%d\n", file, + inode, page, (int)pos, (int)len); + + if (PageUptodate(page)) + return 0; + + /* full page? */ + if (pos_in_page == 0 && len == PAGE_SIZE) { + SetPageUptodate(page); + return 0; + } + + /* past end of file? */ + i_size = i_size_read(inode); + if (page_off >= i_size || + (pos_in_page == 0 && (pos+len) >= i_size)) { + simple_prepare_write(file, page, pos_in_page, pos_in_page+len); + SetPageUptodate(page); + return 0; + } + + /* we need to read it. */ + /* or, do sub-page granularity dirty accounting? */ + /* try to read the full page */ + ci = ceph_inode(inode); + r = ceph_osdc_readpage(osdc, ceph_ino(inode), &ci->i_layout, + page_off, PAGE_SIZE, page); + if (r < 0) + return r; + if (r < pos_in_page) { + /* we didn't read up to our write start pos, zero the gap */ + void *kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr+r, 0, pos_in_page-r); + flush_dcache_page(page); + kunmap_atomic(kaddr, KM_USER0); + } + return 0; +} + +static int ceph_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = file->f_dentry->d_inode; + unsigned offset = pos & (PAGE_CACHE_SIZE - 1); + + dout(10, "write_end file %p inode %p page %p %d~%d (%d)\n", file, + inode, page, (int)pos, (int)copied, (int)len); + + /* did file size increase? */ + spin_lock(&inode->i_lock); + if (pos+copied > inode->i_size) + i_size_write(inode, pos + copied); + spin_unlock(&inode->i_lock); + + SetPageUptodate(page); + set_page_dirty(page); + unlock_page(page); + + return copied; +} + + + +/* generic_perform_write + * page accounting + */ + +static int ceph_set_page_dirty(struct page *page) +{ + struct ceph_inode_info *ci = ceph_inode(page->mapping->host); + spin_lock(&ci->vfs_inode.i_lock); + dout(10, "set_page_dirty %p : %d -> %d \n", page, + ci->i_nr_dirty_pages, ci->i_nr_dirty_pages + 1); + ci->i_nr_dirty_pages++; + spin_lock(&ci->vfs_inode.i_lock); + return 0; +} + +static int ceph_releasepage(struct page *page, gfp_t gfpmask) +{ + struct ceph_inode_info *ci = ceph_inode(page->mapping->host); + int last = 0; + spin_lock(&ci->vfs_inode.i_lock); + dout(10, "releasepage %p gfpmask %d : %d -> %d \n", page, gfpmask, + ci->i_nr_pages, ci->i_nr_pages - 1); + if (--ci->i_nr_pages == 0) + last++; + spin_lock(&ci->vfs_inode.i_lock); + if (last) + ceph_check_caps_wanted(ci, gfpmask); + return 0; +} + + const struct address_space_operations ceph_aops = { .readpage = ceph_readpage, .readpages = ceph_readpages, - .prepare_write = ceph_prepare_write, - .commit_write = ceph_commit_write, + .write_begin = ceph_write_begin, + .write_end = ceph_write_end, + //.prepare_write = ceph_prepare_write, + //.commit_write = ceph_commit_write, .writepage = ceph_writepage, // .writepages = ceph_writepages, +// .set_page_dirty = ceph_set_page_dirty, + .releasepage = ceph_releasepage, }; diff --git a/src/kernel/inode.c b/src/kernel/inode.c index abb24b30b468c..52d4fa023c59c 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -689,7 +689,7 @@ void ceph_remove_cap(struct ceph_inode_cap *cap) * examine currently wanted versus held caps, and release caps to mds * as appropriate. */ -void ceph_check_caps_wanted(struct ceph_inode_info *ci) +void ceph_check_caps_wanted(struct ceph_inode_info *ci, gfp_t gfpmask) { struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); struct ceph_mds_client *mdsc = &client->mdsc; @@ -711,6 +711,13 @@ retry: if ((cap->caps & ~wanted) == 0) continue; /* nothing extra, all good */ + if (gfpmask != GFP_KERNEL) { + /* put on examine list */ + dout(10, "** dropping caps, but bad gfpmask, " + "IMPLEMENT ME *************\n"); + goto out; + } + cap->caps &= wanted; /* drop bits we don't want */ keep = cap->caps; @@ -734,6 +741,7 @@ retry: } /* okay */ +out: spin_unlock(&ci->vfs_inode.i_lock); } @@ -758,7 +766,7 @@ void ceph_put_mode(struct ceph_inode_info *ci, int mode) spin_unlock(&ci->vfs_inode.i_lock); if (last) - ceph_check_caps_wanted(ci); + ceph_check_caps_wanted(ci, GFP_KERNEL); } @@ -944,7 +952,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had) spin_unlock(&ci->vfs_inode.i_lock); if (last) - ceph_check_caps_wanted(ci); + ceph_check_caps_wanted(ci, GFP_KERNEL); } diff --git a/src/kernel/super.c b/src/kernel/super.c index a32ab1b244c8a..297498164dca5 100644 --- a/src/kernel/super.c +++ b/src/kernel/super.c @@ -132,6 +132,7 @@ static struct inode *ceph_alloc_inode(struct super_block *sb) ci->i_rd_ref = ci->i_rdcache_ref = 0; ci->i_wr_ref = ci->i_wrbuffer_ref = 0; + ci->i_nr_pages = ci->i_nr_dirty_pages = 0; ci->i_hashval = 0; diff --git a/src/kernel/super.h b/src/kernel/super.h index ef7f13123a87c..dc6043c026e31 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -166,6 +166,8 @@ struct ceph_inode_info { int i_rd_ref, i_rdcache_ref, i_wr_ref, i_wrbuffer_ref; + int i_nr_pages, i_nr_dirty_pages; // hrm! + unsigned long i_hashval; struct inode vfs_inode; /* at end */ @@ -361,7 +363,7 @@ extern int ceph_handle_cap_trunc(struct inode *inode, struct ceph_mds_session *session); extern int ceph_get_cap_refs(struct ceph_inode_info *ci, int need, int want, int *got); extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); -extern void ceph_check_caps_wanted(struct ceph_inode_info *ci); +extern void ceph_check_caps_wanted(struct ceph_inode_info *ci, gfp_t gfpmask); extern void ceph_get_mode(struct ceph_inode_info *ci, int mode); extern void ceph_put_mode(struct ceph_inode_info *ci, int mode); -- 2.39.5