From 2ab1bf30635e319490e770bc7bc668075df5da14 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 4 Jun 2009 18:00:50 -0700 Subject: [PATCH] kclient: use capsnap to get snapped file size --- src/kernel/addr.c | 64 +++++++++++++++++++++++++--------------------- src/kernel/inode.c | 10 +------- src/kernel/super.h | 2 +- 3 files changed, 37 insertions(+), 39 deletions(-) diff --git a/src/kernel/addr.c b/src/kernel/addr.c index 07265f36cd7b7..44e24d72556ca 100644 --- a/src/kernel/addr.c +++ b/src/kernel/addr.c @@ -339,7 +339,8 @@ out: * * Caller holds i_lock. */ -static struct ceph_snap_context *__get_oldest_context(struct inode *inode) +static struct ceph_snap_context *__get_oldest_context(struct inode *inode, + u64 *snap_size) { struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_snap_context *snapc = NULL; @@ -350,12 +351,14 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode) capsnap = list_entry(p, struct ceph_cap_snap, ci_item); dout(20, " cap_snap %p snapc %p has %d dirty pages\n", capsnap, capsnap->context, capsnap->dirty_pages); - if (capsnap->dirty_pages) + if (capsnap->dirty_pages) { + snapc = ceph_get_snap_context(capsnap->context); + if (snap_size) + *snap_size = capsnap->size; break; + } } - if (capsnap && capsnap->dirty_pages) { - snapc = ceph_get_snap_context(capsnap->context); - } else if (ci->i_snap_realm) { + if (!snapc && ci->i_snap_realm) { snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context); dout(20, " head snapc %p has %d dirty pages\n", snapc, ci->i_wrbuffer_ref_head); @@ -363,12 +366,13 @@ static struct ceph_snap_context *__get_oldest_context(struct inode *inode) return snapc; } -static struct ceph_snap_context *get_oldest_context(struct inode *inode) +static struct ceph_snap_context *get_oldest_context(struct inode *inode, + u64 *snap_size) { struct ceph_snap_context *snapc = NULL; spin_lock(&inode->i_lock); - snapc = __get_oldest_context(inode); + snapc = __get_oldest_context(inode, snap_size); spin_unlock(&inode->i_lock); return snapc; } @@ -391,6 +395,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) loff_t i_size; int err = 0; struct ceph_snap_context *snapc; + u64 snap_size = 0; dout(10, "writepage %p idx %lu\n", page, page->index); @@ -402,22 +407,13 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) ci = ceph_inode(inode); osdc = &ceph_inode_to_client(inode)->osdc; - /* is this a partial page at end of file? */ - spin_lock(&inode->i_lock); - i_size = ci->i_as_size; - if (i_size < page_off + len) - len = i_size - page_off; - spin_unlock(&inode->i_lock); - dout(10, "writepage %p page %p index %lu on %llu~%u\n", - inode, page, page->index, page_off, len); - /* verify this is a writeable snap context */ snapc = (void *)page->private; if (snapc == NULL) { dout(20, "writepage %p page %p not dirty?\n", inode, page); goto out; } - if (snapc != get_oldest_context(inode)) { + if (snapc != get_oldest_context(inode, &snap_size)) { dout(10, "writepage %p page %p snapc %p not writeable - noop\n", inode, page, (void *)page->private); /* we should only noop if called by kswapd */ @@ -425,6 +421,17 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) goto out; } + /* is this a partial page at end of file? */ + if (snap_size) + i_size = snap_size; + else + i_size = i_size_read(inode); + if (i_size < page_off + len) + len = i_size - page_off; + + dout(10, "writepage %p page %p index %lu on %llu~%u\n", + inode, page, page->index, page_off, len); + set_page_writeback(page); err = ceph_osdc_writepages(osdc, ceph_vino(inode), &ci->i_layout, snapc, @@ -565,6 +572,7 @@ static int ceph_writepages_start(struct address_space *mapping, unsigned wsize = 1 << inode->i_blkbits; struct ceph_osd_request *req = NULL; int do_sync; + u64 snap_size = 0; /* * Include a 'sync' in the OSD request if this is a data @@ -618,7 +626,7 @@ static int ceph_writepages_start(struct address_space *mapping, retry: /* find oldest snap context with dirty data */ ceph_put_snap_context(snapc); - snapc = get_oldest_context(inode); + snapc = get_oldest_context(inode, &snap_size); if (!snapc) { /* hmm, why does writepages get called when there is no dirty data? */ @@ -643,7 +651,7 @@ retry: int pvec_pages, locked_pages; struct page *page; int want; - u64 offset, len, as_size; + u64 offset, len; struct ceph_osd_request_head *reqhead; struct ceph_osd_op *op; @@ -697,11 +705,11 @@ get_more_pages: dout(20, "waiting on writeback %p\n", page); wait_on_page_writeback(page); } - spin_lock(&inode->i_lock); - as_size = ci->i_as_size; - spin_unlock(&inode->i_lock); - if (page_offset(page) >= as_size) { - dout(20, "%p > as_size %llu\n", page, as_size); + if ((snap_size && page_offset(page) > snap_size) || + (!snap_size && + page_offset(page) > i_size_read(inode))) { + dout(20, "%p page eof %llu\n", page, snap_size ? + snap_size : i_size_read(inode)); done = 1; unlock_page(page); break; @@ -793,10 +801,8 @@ get_more_pages: /* submit the write */ offset = req->r_pages[0]->index << PAGE_CACHE_SHIFT; - spin_lock(&inode->i_lock); - len = min(ci->i_as_size - offset, + len = min((snap_size ? snap_size : i_size_read(inode)) - offset, (u64)locked_pages << PAGE_CACHE_SHIFT); - spin_unlock(&inode->i_lock); dout(10, "writepages got %d pages at %llu~%llu\n", locked_pages, offset, len); @@ -860,7 +866,7 @@ out_free: static int context_is_writeable_or_written(struct inode *inode, struct ceph_snap_context *snapc) { - struct ceph_snap_context *oldest = get_oldest_context(inode); + struct ceph_snap_context *oldest = get_oldest_context(inode, NULL); return !oldest || snapc->seq <= oldest->seq; } @@ -912,7 +918,7 @@ retry_locked: * this page is already dirty in another (older) snap * context! is it writeable now? */ - snapc = get_oldest_context(inode); + snapc = get_oldest_context(inode, NULL); up_read(&mdsc->snap_rwsem); if (snapc != (void *)page->private) { diff --git a/src/kernel/inode.c b/src/kernel/inode.c index 8c2c43d6f4208..9fe9ae1523c7d 100644 --- a/src/kernel/inode.c +++ b/src/kernel/inode.c @@ -288,7 +288,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) for (i = 0; i < CEPH_FILE_MODE_NUM; i++) ci->i_nr_by_mode[i] = 0; - ci->i_as_size = 0; ci->i_truncate_seq = 0; ci->i_truncate_size = 0; ci->i_truncate_pending = 0; @@ -365,8 +364,6 @@ int ceph_fill_file_size(struct inode *inode, int issued, inode->i_size = size; inode->i_blocks = (size + (1<<9) - 1) >> 9; ci->i_reported_size = size; - if (ci->i_as_size < size) - ci->i_as_size = size; if (truncate_seq != ci->i_truncate_seq) { dout(10, "truncate_seq %u -> %u\n", ci->i_truncate_seq, truncate_seq); @@ -1156,8 +1153,6 @@ int ceph_inode_set_size(struct inode *inode, loff_t size) dout(30, "set_size %p %llu -> %llu\n", inode, inode->i_size, size); inode->i_size = size; inode->i_blocks = (size + (1 << 9) - 1) >> 9; - if (ci->i_as_size < size) - ci->i_as_size = size; /* tell the MDS if we are approaching max_size */ if ((size << 1) >= ci->i_max_size && @@ -1291,7 +1286,6 @@ retry: truncate_inode_pages(inode->i_mapping, to); spin_lock(&inode->i_lock); - ci->i_as_size = to; ci->i_truncate_pending--; if (ci->i_truncate_pending == 0) wake = 1; @@ -1448,9 +1442,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size > inode->i_size) { inode->i_size = attr->ia_size; - if (ci->i_as_size < attr->ia_size) { - ci->i_as_size = attr->ia_size; - } else { + if (attr->ia_size < inode->i_size) { ci->i_truncate_size = attr->ia_size; ci->i_truncate_pending++; queue_trunc = 1; diff --git a/src/kernel/super.h b/src/kernel/super.h index 3921a6e6931a8..4651284acef19 100644 --- a/src/kernel/super.h +++ b/src/kernel/super.h @@ -320,7 +320,6 @@ struct ceph_inode_info { int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ - loff_t i_as_size; /* address space size (pre-truncation) */ u32 i_truncate_seq; /* last truncate to smaller size */ u64 i_truncate_size; /* and the size we last truncated down to */ int i_truncate_pending; /* still need to call vmtruncate */ @@ -804,6 +803,7 @@ extern int ceph_inode_set_size(struct inode *inode, loff_t size); extern void ceph_inode_writeback(struct work_struct *work); extern void ceph_vmtruncate_work(struct work_struct *work); extern void __ceph_do_pending_vmtruncate(struct inode *inode); +extern void __ceph_queue_vmtruncate(struct inode *inode); extern int ceph_do_getattr(struct inode *inode, int mask); extern int ceph_permission(struct inode *inode, int mask); -- 2.39.5