]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
kclient: dirty page accounting hell
authorSage Weil <sage@newdream.net>
Sat, 3 May 2008 02:59:39 +0000 (19:59 -0700)
committerSage Weil <sage@newdream.net>
Sat, 3 May 2008 02:59:39 +0000 (19:59 -0700)
src/kernel/addr.c
src/kernel/inode.c
src/kernel/mds_client.c

index d727a35817479d11fb3e02d62b00709283344f9a..53c8295cd9556a285e38f7ab51909fab7350d7e9 100644 (file)
@@ -290,7 +290,7 @@ get_more_pages:
                        set_page_writeback(page);
                        cleaned++;
 
-                       dout(20, "%p locked+cleaned page %p idx %lu\n",
+                       derr(20, "%p locked+cleaned page %p idx %lu\n",
                             inode, page, page->index);
                        
                        if (pages)
@@ -351,7 +351,7 @@ get_more_pages:
                                if (i < wrote)
                                        SetPageUptodate(page);
                                else if (rc < 0) {
-                                       dout(20, "%p redirtying page %p\n", 
+                                       derr(20, "%p redirtying page %p\n", 
                                             inode, page);
                                        redirty_page_for_writepage(wbc, page);
                                }
@@ -512,7 +512,8 @@ static int ceph_set_page_dirty(struct page *page)
                return !TestSetPageDirty(page);
 
        if (TestSetPageDirty(page)) {
-               dout(20, "set_page_dirty %p -- already dirty\n", page);
+               dout(20, "%p set_page_dirty %p -- already dirty\n", 
+                    mapping->host, page);
                return 0;
        }
 
@@ -531,8 +532,13 @@ static int ceph_set_page_dirty(struct page *page)
 
                ci = ceph_inode(mapping->host);
                atomic_inc(&ci->i_wrbuffer_ref);
-               dout(20, "set_page_dirty %p %p %d -> %d (?)\n", page,
-                    &ci->vfs_inode,
+               /*
+                * set PagePrivate so that we get invalidatepage callback
+                * on truncate for proper dirty page accounting for mmap
+                */
+               SetPagePrivate(page);
+               derr(20, "%p set_page_dirty %p %d -> %d (?)\n", 
+                    mapping->host, page,
                     atomic_read(&ci->i_wrbuffer_ref)-1,
                     atomic_read(&ci->i_wrbuffer_ref));
        }
@@ -542,6 +548,36 @@ static int ceph_set_page_dirty(struct page *page)
        return 1;
 }
 
+void ceph_invalidatepage(struct page *page, unsigned long offset)
+{
+       struct ceph_inode_info *ci;
+
+       ClearPagePrivate(page);
+       if (!PageDirty(page))
+               return;
+       if (!page->mapping)
+               return;
+       ci = ceph_inode(page->mapping->host);
+       if (offset <= (page->index << PAGE_CACHE_SHIFT)) {
+               derr(20, "%p invalidatepage %p idx %lu full dirty page\n", 
+                    &ci->vfs_inode, page, page->index);
+               atomic_dec(&ci->i_wrbuffer_ref);
+               /*
+                * pretty sure this is fundamentally racy.  help!
+                */
+               ClearPageDirty(page);
+       } else
+               derr(20, "%p invalidatepage %p idx %lu partial dirty page\n", 
+                    &ci->vfs_inode, page, page->index);
+}
+
+int ceph_releasepage(struct page *page, gfp_t g)
+{
+       struct inode *inode = page->mapping ? page->mapping->host:0;
+       dout(20, "%p releasepage %p\n", inode, page);
+       WARN_ON(PageDirty(page));
+       return 0;
+}
 
 const struct address_space_operations ceph_aops = {
        .readpage = ceph_readpage,
@@ -551,4 +587,6 @@ const struct address_space_operations ceph_aops = {
        .write_begin = ceph_write_begin,
        .write_end = ceph_write_end,
        .set_page_dirty = ceph_set_page_dirty,
+       .invalidatepage = ceph_invalidatepage,
+       .releasepage = ceph_releasepage,
 };
index cd35a56c5eb1c8078e7f025bbadaae2a9faa0aeb..723cfc734ea59c3e2f88359be8b8b96a1c5cf32a 100644 (file)
@@ -1061,6 +1061,7 @@ void ceph_inode_writeback(struct work_struct *work)
        write_inode_now(&ci->vfs_inode, 0);
 }
 
+
 /*
  * called by setattr
  */
@@ -1075,6 +1076,8 @@ static int apply_truncate(struct inode *inode, loff_t size)
                ci->i_reported_size = size;
                spin_unlock(&inode->i_lock);
        }
+       if (atomic_read(&ci->i_wrbuffer_ref) == 0)
+               ceph_check_caps(ci, 0);
        return rc;
 }
 
@@ -1106,6 +1109,8 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
        if (to >= 0) {
                dout(10, "__do_pending_vmtruncate %p to %lld\n", inode, to);
                vmtruncate(inode, to);
+               if (atomic_read(&ci->i_wrbuffer_ref) == 0)
+                       ceph_check_caps(ci, 0);
        } else
                dout(10, "__do_pending_vmtruncate %p nothing to do\n", inode);
 }
@@ -1238,19 +1243,19 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
 
 void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr)
 {
-       int last = 0;
+       int was_last;
        int v;
 
        spin_lock(&ci->vfs_inode.i_lock);
-       last = atomic_sub_and_test(nr, &ci->i_wrbuffer_ref);
+       was_last = atomic_sub_and_test(nr, &ci->i_wrbuffer_ref);
        v = atomic_read(&ci->i_wrbuffer_ref);
        spin_unlock(&ci->vfs_inode.i_lock);
 
        dout(30, "put_wrbuffer_cap_refs on %p %d -> %d (?)%s\n",
-            &ci->vfs_inode, v+nr, v, last == 0 ? " LAST":"");
-       BUG_ON(v < 0);
+            &ci->vfs_inode, v+nr, v, was_last ? " LAST":"");
+       WARN_ON(v < 0);
 
-       if (last == 0)
+       if (was_last)
                ceph_check_caps(ci, 0);
 }
 
index f42c2d20a5e7516ec3ffb76412ac1b755d083bea..e65f82bdebabb5bfddfc7628c49d4aa6dc5c3347 100644 (file)
@@ -1566,7 +1566,8 @@ static void check_delayed_caps(struct ceph_mds_client *mdsc)
 }
 
 static void flush_write_caps(struct ceph_mds_client *mdsc,
-                            struct ceph_mds_session *session)
+                            struct ceph_mds_session *session, 
+                            int purge)
 {
        struct list_head *p, *n;
        
@@ -1576,14 +1577,24 @@ static void flush_write_caps(struct ceph_mds_client *mdsc,
                struct inode *inode = &cap->ci->vfs_inode;
                int used, wanted;
 
+               /* invalidate any dirty remaining pages */
+               __ceph_do_pending_vmtruncate(inode);
+
                spin_lock(&inode->i_lock);
                if ((cap->implemented & (CEPH_CAP_WR|CEPH_CAP_WRBUFFER)) == 0) {
                        spin_unlock(&inode->i_lock);
                        continue;
                }
+
                used = __ceph_caps_used(cap->ci);
                wanted = __ceph_caps_wanted(cap->ci);
 
+               if (purge && (used || wanted)) {
+                       derr(0, "residual caps on %p used %d wanted %d %llu\n", 
+                            inode, used, wanted, inode->i_size);
+                       used = wanted = 0;
+               }
+
                __ceph_mdsc_send_cap(mdsc, session, cap, used, wanted, 1);
        }
 }
@@ -1600,7 +1611,7 @@ static int close_session(struct ceph_mds_client *mdsc,
        if (session->s_state >= CEPH_MDS_SESSION_CLOSING)
                goto done;
 
-       flush_write_caps(mdsc, session);
+       flush_write_caps(mdsc, session, 1);
        
        session->s_state = CEPH_MDS_SESSION_CLOSING;
        msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE,