]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: independently track size of address space
authorSage Weil <sage@newdream.net>
Thu, 4 Jun 2009 23:55:37 +0000 (16:55 -0700)
committerSage Weil <sage@newdream.net>
Thu, 4 Jun 2009 23:55:37 +0000 (16:55 -0700)
We do vmtruncate asynchronously, but must update i_size immediately.
This is partly to avoid dropping dirty snapped pages.  But we
can't use i_size for writepages because that will reflect the
truncated size, not the size of the pre-trunc snapped state.

src/kernel/addr.c
src/kernel/inode.c
src/kernel/super.h

index 4ee5d75fceffca4b43020c30499f2e696afba170..07265f36cd7b7e549d7d1ec24b68af3b1bc455c8 100644 (file)
@@ -403,9 +403,11 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
        osdc = &ceph_inode_to_client(inode)->osdc;
 
        /* is this a partial page at end of file? */
-       i_size = i_size_read(inode);
+       spin_lock(&inode->i_lock);
+       i_size = ci->i_as_size;
        if (i_size < page_off + len)
                len = i_size - page_off;
+       spin_unlock(&inode->i_lock);
        dout(10, "writepage %p page %p index %lu on %llu~%u\n",
             inode, page, page->index, page_off, len);
 
@@ -641,9 +643,9 @@ retry:
                int pvec_pages, locked_pages;
                struct page *page;
                int want;
-               u64 offset, len;
+               u64 offset, len, as_size;
                struct ceph_osd_request_head *reqhead;
-               struct ceph_osd_op *op;
+               struct ceph_osd_op *op;         
 
                next = 0;
                locked_pages = 0;
@@ -695,9 +697,11 @@ get_more_pages:
                                dout(20, "waiting on writeback %p\n", page);
                                wait_on_page_writeback(page);
                        }
-                       if (page_offset(page) >= i_size_read(inode)) {
-                               dout(20, "%p past eof %llu\n", page,
-                                    i_size_read(inode));
+                       spin_lock(&inode->i_lock);
+                       as_size = ci->i_as_size;
+                       spin_unlock(&inode->i_lock);
+                       if (page_offset(page) >= as_size) {
+                               dout(20, "%p > as_size %llu\n", page, as_size);
                                done = 1;
                                unlock_page(page);
                                break;
@@ -789,8 +793,10 @@ get_more_pages:
 
                /* submit the write */
                offset = req->r_pages[0]->index << PAGE_CACHE_SHIFT;
-               len = min(i_size_read(inode) - offset,
+               spin_lock(&inode->i_lock);
+               len = min(ci->i_as_size - offset,
                          (u64)locked_pages << PAGE_CACHE_SHIFT);
+               spin_unlock(&inode->i_lock);
                dout(10, "writepages got %d pages at %llu~%llu\n",
                     locked_pages, offset, len);
 
index 3ed15068218c0be57b1a9a506167957ba31ce0ec..8c2c43d6f42083d7e2f443302f96220ecdd37e97 100644 (file)
@@ -288,6 +288,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        for (i = 0; i < CEPH_FILE_MODE_NUM; i++)
                ci->i_nr_by_mode[i] = 0;
 
+       ci->i_as_size = 0;
        ci->i_truncate_seq = 0;
        ci->i_truncate_size = 0;
        ci->i_truncate_pending = 0;
@@ -364,6 +365,8 @@ int ceph_fill_file_size(struct inode *inode, int issued,
                inode->i_size = size;
                inode->i_blocks = (size + (1<<9) - 1) >> 9;
                ci->i_reported_size = size;
+               if (ci->i_as_size < size)
+                       ci->i_as_size = size;
                if (truncate_seq != ci->i_truncate_seq) {
                        dout(10, "truncate_seq %u -> %u\n",
                             ci->i_truncate_seq, truncate_seq);
@@ -1153,6 +1156,8 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
        dout(30, "set_size %p %llu -> %llu\n", inode, inode->i_size, size);
        inode->i_size = size;
        inode->i_blocks = (size + (1 << 9) - 1) >> 9;
+       if (ci->i_as_size < size)
+               ci->i_as_size = size;
 
        /* tell the MDS if we are approaching max_size */
        if ((size << 1) >= ci->i_max_size &&
@@ -1256,12 +1261,27 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
        u64 to;
        int wrbuffer_refs, wake = 0;
 
+retry:
        spin_lock(&inode->i_lock);
        if (ci->i_truncate_pending == 0) {
                dout(10, "__do_pending_vmtruncate %p none pending\n", inode);
                spin_unlock(&inode->i_lock);
                return;
        }
+
+       /*
+        * make sure any dirty snapped pages are flushed before we
+        * possibly truncate them.. so write AND block!
+        */
+       if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
+               dout(10, "__do_pending_vmtruncate %p flushing snaps first\n",
+                    inode);
+               spin_unlock(&inode->i_lock);
+               filemap_write_and_wait_range(&inode->i_data, 0,
+                                            CEPH_FILE_MAX_SIZE);
+               goto retry;
+       }
+
        to = ci->i_truncate_size;
        wrbuffer_refs = ci->i_wrbuffer_ref;
        dout(10, "__do_pending_vmtruncate %p (%d) to %lld\n", inode,
@@ -1271,6 +1291,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
        truncate_inode_pages(inode->i_mapping, to);
 
        spin_lock(&inode->i_lock);
+       ci->i_as_size = to;
        ci->i_truncate_pending--;
        if (ci->i_truncate_pending == 0)
                wake = 1;
@@ -1282,6 +1303,7 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
                wake_up(&ci->i_cap_wq);
 }
 
+
 /*
  * symlinks
  */
@@ -1425,10 +1447,14 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
                }
                if ((issued & CEPH_CAP_FILE_EXCL) &&
                    attr->ia_size > inode->i_size) {
-                       ci->i_truncate_size = attr->ia_size;
-                       ci->i_truncate_pending++;
-                       queue_trunc = 1;
                        inode->i_size = attr->ia_size;
+                       if (ci->i_as_size < attr->ia_size) {
+                               ci->i_as_size = attr->ia_size;
+                       } else {
+                               ci->i_truncate_size = attr->ia_size;
+                               ci->i_truncate_pending++;
+                               queue_trunc = 1;
+                       }
                        inode->i_blocks =
                                (attr->ia_size + (1 << 9) - 1) >> 9;
                        inode->i_ctime = attr->ia_ctime;
index 9bd3a7970c2b55adadb03b9bd9595bdd4b0a0b72..3921a6e6931a8ab47ef4d76a9c7c2add55ec2b63 100644 (file)
@@ -320,6 +320,7 @@ struct ceph_inode_info {
 
        int i_nr_by_mode[CEPH_FILE_MODE_NUM];  /* open file counts */
 
+       loff_t i_as_size;          /* address space size (pre-truncation) */
        u32 i_truncate_seq;        /* last truncate to smaller size */
        u64 i_truncate_size;       /*  and the size we last truncated down to */
        int i_truncate_pending;    /*  still need to call vmtruncate */